1 1 1 3 4 4 1 3 1 1 1 3 3 4 1 1 1 116 116 2 6 1 1 1 1 3 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/namei.h> #include <linux/nospec.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "io_uring.h" #include "tctx.h" #include "poll.h" #include "timeout.h" #include "waitid.h" #include "futex.h" #include "cancel.h" struct io_cancel { struct file *file; u64 addr; u32 flags; s32 fd; u8 opcode; }; #define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \ IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED | \ IORING_ASYNC_CANCEL_USERDATA | IORING_ASYNC_CANCEL_OP) /* * Returns true if the request matches the criteria outlined by 'cd'. */ bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd) { bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA; if (req->ctx != cd->ctx) return false; if (!(cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP))) match_user_data = true; if (cd->flags & IORING_ASYNC_CANCEL_ANY) goto check_seq; if (cd->flags & IORING_ASYNC_CANCEL_FD) { if (req->file != cd->file) return false; } if (cd->flags & IORING_ASYNC_CANCEL_OP) { if (req->opcode != cd->opcode) return false; } if (match_user_data && req->cqe.user_data != cd->data) return false; if (cd->flags & IORING_ASYNC_CANCEL_ALL) { check_seq: if (cd->seq == req->work.cancel_seq) return false; req->work.cancel_seq = cd->seq; } return true; } static bool io_cancel_cb(struct io_wq_work *work, void *data) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); struct io_cancel_data *cd = data; return io_cancel_req_match(req, cd); } static int io_async_cancel_one(struct io_uring_task *tctx, struct io_cancel_data *cd) { enum io_wq_cancel cancel_ret; int ret = 0; bool all; if (!tctx || !tctx->io_wq) return -ENOENT; all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all); switch (cancel_ret) { case IO_WQ_CANCEL_OK: ret = 0; break; case IO_WQ_CANCEL_RUNNING: ret = -EALREADY; break; case IO_WQ_CANCEL_NOTFOUND: ret = -ENOENT; break; } return ret; } int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd, unsigned issue_flags) { struct io_ring_ctx *ctx = cd->ctx; int ret; WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring); ret = io_async_cancel_one(tctx, cd); /* * Fall-through even for -EALREADY, as we may have poll armed * that need unarming. */ if (!ret) return 0; ret = io_poll_cancel(ctx, cd, issue_flags); if (ret != -ENOENT) return ret; ret = io_waitid_cancel(ctx, cd, issue_flags); if (ret != -ENOENT) return ret; ret = io_futex_cancel(ctx, cd, issue_flags); if (ret != -ENOENT) return ret; spin_lock(&ctx->completion_lock); if (!(cd->flags & IORING_ASYNC_CANCEL_FD)) ret = io_timeout_cancel(ctx, cd); spin_unlock(&ctx->completion_lock); return ret; } int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel); if (unlikely(req->flags & REQ_F_BUFFER_SELECT)) return -EINVAL; if (sqe->off || sqe->splice_fd_in) return -EINVAL; cancel->addr = READ_ONCE(sqe->addr); cancel->flags = READ_ONCE(sqe->cancel_flags); if (cancel->flags & ~CANCEL_FLAGS) return -EINVAL; if (cancel->flags & IORING_ASYNC_CANCEL_FD) { if (cancel->flags & IORING_ASYNC_CANCEL_ANY) return -EINVAL; cancel->fd = READ_ONCE(sqe->fd); } if (cancel->flags & IORING_ASYNC_CANCEL_OP) { if (cancel->flags & IORING_ASYNC_CANCEL_ANY) return -EINVAL; cancel->opcode = READ_ONCE(sqe->len); } return 0; } static int __io_async_cancel(struct io_cancel_data *cd, struct io_uring_task *tctx, unsigned int issue_flags) { bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); struct io_ring_ctx *ctx = cd->ctx; struct io_tctx_node *node; int ret, nr = 0; do { ret = io_try_cancel(tctx, cd, issue_flags); if (ret == -ENOENT) break; if (!all) return ret; nr++; } while (1); /* slow path, try all io-wq's */ io_ring_submit_lock(ctx, issue_flags); ret = -ENOENT; list_for_each_entry(node, &ctx->tctx_list, ctx_node) { struct io_uring_task *tctx = node->task->io_uring; ret = io_async_cancel_one(tctx, cd); if (ret != -ENOENT) { if (!all) break; nr++; } } io_ring_submit_unlock(ctx, issue_flags); return all ? nr : ret; } int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) { struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel); struct io_cancel_data cd = { .ctx = req->ctx, .data = cancel->addr, .flags = cancel->flags, .opcode = cancel->opcode, .seq = atomic_inc_return(&req->ctx->cancel_seq), }; struct io_uring_task *tctx = req->task->io_uring; int ret; if (cd.flags & IORING_ASYNC_CANCEL_FD) { if (req->flags & REQ_F_FIXED_FILE || cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) { req->flags |= REQ_F_FIXED_FILE; req->file = io_file_get_fixed(req, cancel->fd, issue_flags); } else { req->file = io_file_get_normal(req, cancel->fd); } if (!req->file) { ret = -EBADF; goto done; } cd.file = req->file; } ret = __io_async_cancel(&cd, tctx, issue_flags); done: if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_OK; } void init_hash_table(struct io_hash_table *table, unsigned size) { unsigned int i; for (i = 0; i < size; i++) { spin_lock_init(&table->hbs[i].lock); INIT_HLIST_HEAD(&table->hbs[i].list); } } static int __io_sync_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd, int fd) { struct io_ring_ctx *ctx = cd->ctx; /* fixed must be grabbed every time since we drop the uring_lock */ if ((cd->flags & IORING_ASYNC_CANCEL_FD) && (cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) { if (unlikely(fd >= ctx->nr_user_files)) return -EBADF; fd = array_index_nospec(fd, ctx->nr_user_files); cd->file = io_file_from_index(&ctx->file_table, fd); if (!cd->file) return -EBADF; } return __io_async_cancel(cd, tctx, 0); } int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) __must_hold(&ctx->uring_lock) { struct io_cancel_data cd = { .ctx = ctx, .seq = atomic_inc_return(&ctx->cancel_seq), }; ktime_t timeout = KTIME_MAX; struct io_uring_sync_cancel_reg sc; struct file *file = NULL; DEFINE_WAIT(wait); int ret, i; if (copy_from_user(&sc, arg, sizeof(sc))) return -EFAULT; if (sc.flags & ~CANCEL_FLAGS) return -EINVAL; for (i = 0; i < ARRAY_SIZE(sc.pad); i++) if (sc.pad[i]) return -EINVAL; for (i = 0; i < ARRAY_SIZE(sc.pad2); i++) if (sc.pad2[i]) return -EINVAL; cd.data = sc.addr; cd.flags = sc.flags; cd.opcode = sc.opcode; /* we can grab a normal file descriptor upfront */ if ((cd.flags & IORING_ASYNC_CANCEL_FD) && !(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) { file = fget(sc.fd); if (!file) return -EBADF; cd.file = file; } ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); /* found something, done! */ if (ret != -EALREADY) goto out; if (sc.timeout.tv_sec != -1UL || sc.timeout.tv_nsec != -1UL) { struct timespec64 ts = { .tv_sec = sc.timeout.tv_sec, .tv_nsec = sc.timeout.tv_nsec }; timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); } /* * Keep looking until we get -ENOENT. we'll get woken everytime * every time a request completes and will retry the cancelation. */ do { cd.seq = atomic_inc_return(&ctx->cancel_seq); prepare_to_wait(&ctx->cq_wait, &wait, TASK_INTERRUPTIBLE); ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); mutex_unlock(&ctx->uring_lock); if (ret != -EALREADY) break; ret = io_run_task_work_sig(ctx); if (ret < 0) break; ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS); if (!ret) { ret = -ETIME; break; } mutex_lock(&ctx->uring_lock); } while (1); finish_wait(&ctx->cq_wait, &wait); mutex_lock(&ctx->uring_lock); if (ret == -ENOENT || ret > 0) ret = 0; out: if (file) fput(file); return ret; } |
7 1 6 5 1 4 1 1 1 1 1 1 1 1 1 1 1 1 7 6 6 6 6 7 7 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_flow.c Generic flow classifier * * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/list.h> #include <linux/jhash.h> #include <linux/random.h> #include <linux/pkt_cls.h> #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/if_vlan.h> #include <linux/slab.h> #include <linux/module.h> #include <net/inet_sock.h> #include <net/pkt_cls.h> #include <net/ip.h> #include <net/route.h> #include <net/flow_dissector.h> #include <net/tc_wrapper.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #endif struct flow_head { struct list_head filters; struct rcu_head rcu; }; struct flow_filter { struct list_head list; struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_proto *tp; struct timer_list perturb_timer; u32 perturb_period; u32 handle; u32 nkeys; u32 keymask; u32 mode; u32 mask; u32 xor; u32 rshift; u32 addend; u32 divisor; u32 baseclass; u32 hashrnd; struct rcu_work rwork; }; static inline u32 addr_fold(void *addr) { unsigned long a = (unsigned long)addr; return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0); } static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow) { __be32 src = flow_get_u32_src(flow); if (src) return ntohl(src); return addr_fold(skb->sk); } static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) { __be32 dst = flow_get_u32_dst(flow); if (dst) return ntohl(dst); return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); } static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) { return flow->basic.ip_proto; } static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.src); return addr_fold(skb->sk); } static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.dst); return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); } static u32 flow_get_iif(const struct sk_buff *skb) { return skb->skb_iif; } static u32 flow_get_priority(const struct sk_buff *skb) { return skb->priority; } static u32 flow_get_mark(const struct sk_buff *skb) { return skb->mark; } static u32 flow_get_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) return addr_fold(skb_nfct(skb)); #else return 0; #endif } #if IS_ENABLED(CONFIG_NF_CONNTRACK) #define CTTUPLE(skb, member) \ ({ \ enum ip_conntrack_info ctinfo; \ const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); \ if (ct == NULL) \ goto fallback; \ ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member; \ }) #else #define CTTUPLE(skb, member) \ ({ \ goto fallback; \ 0; \ }) #endif static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) { switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); case htons(ETH_P_IPV6): return ntohl(CTTUPLE(skb, src.u3.ip6[3])); } fallback: return flow_get_src(skb, flow); } static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) { switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); case htons(ETH_P_IPV6): return ntohl(CTTUPLE(skb, dst.u3.ip6[3])); } fallback: return flow_get_dst(skb, flow); } static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, src.u.all)); fallback: return flow_get_proto_src(skb, flow); } static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, dst.u.all)); fallback: return flow_get_proto_dst(skb, flow); } static u32 flow_get_rtclassid(const struct sk_buff *skb) { #ifdef CONFIG_IP_ROUTE_CLASSID if (skb_dst(skb)) return skb_dst(skb)->tclassid; #endif return 0; } static u32 flow_get_skuid(const struct sk_buff *skb) { struct sock *sk = skb_to_full_sk(skb); if (sk && sk->sk_socket && sk->sk_socket->file) { kuid_t skuid = sk->sk_socket->file->f_cred->fsuid; return from_kuid(&init_user_ns, skuid); } return 0; } static u32 flow_get_skgid(const struct sk_buff *skb) { struct sock *sk = skb_to_full_sk(skb); if (sk && sk->sk_socket && sk->sk_socket->file) { kgid_t skgid = sk->sk_socket->file->f_cred->fsgid; return from_kgid(&init_user_ns, skgid); } return 0; } static u32 flow_get_vlan_tag(const struct sk_buff *skb) { u16 tag; if (vlan_get_tag(skb, &tag) < 0) return 0; return tag & VLAN_VID_MASK; } static u32 flow_get_rxhash(struct sk_buff *skb) { return skb_get_hash(skb); } static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow) { switch (key) { case FLOW_KEY_SRC: return flow_get_src(skb, flow); case FLOW_KEY_DST: return flow_get_dst(skb, flow); case FLOW_KEY_PROTO: return flow_get_proto(skb, flow); case FLOW_KEY_PROTO_SRC: return flow_get_proto_src(skb, flow); case FLOW_KEY_PROTO_DST: return flow_get_proto_dst(skb, flow); case FLOW_KEY_IIF: return flow_get_iif(skb); case FLOW_KEY_PRIORITY: return flow_get_priority(skb); case FLOW_KEY_MARK: return flow_get_mark(skb); case FLOW_KEY_NFCT: return flow_get_nfct(skb); case FLOW_KEY_NFCT_SRC: return flow_get_nfct_src(skb, flow); case FLOW_KEY_NFCT_DST: return flow_get_nfct_dst(skb, flow); case FLOW_KEY_NFCT_PROTO_SRC: return flow_get_nfct_proto_src(skb, flow); case FLOW_KEY_NFCT_PROTO_DST: return flow_get_nfct_proto_dst(skb, flow); case FLOW_KEY_RTCLASSID: return flow_get_rtclassid(skb); case FLOW_KEY_SKUID: return flow_get_skuid(skb); case FLOW_KEY_SKGID: return flow_get_skgid(skb); case FLOW_KEY_VLAN_TAG: return flow_get_vlan_tag(skb); case FLOW_KEY_RXHASH: return flow_get_rxhash(skb); default: WARN_ON(1); return 0; } } #define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | \ (1 << FLOW_KEY_DST) | \ (1 << FLOW_KEY_PROTO) | \ (1 << FLOW_KEY_PROTO_SRC) | \ (1 << FLOW_KEY_PROTO_DST) | \ (1 << FLOW_KEY_NFCT_SRC) | \ (1 << FLOW_KEY_NFCT_DST) | \ (1 << FLOW_KEY_NFCT_PROTO_SRC) | \ (1 << FLOW_KEY_NFCT_PROTO_DST)) TC_INDIRECT_SCOPE int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct flow_head *head = rcu_dereference_bh(tp->root); struct flow_filter *f; u32 keymask; u32 classid; unsigned int n, key; int r; list_for_each_entry_rcu(f, &head->filters, list) { u32 keys[FLOW_KEY_MAX + 1]; struct flow_keys flow_keys; if (!tcf_em_tree_match(skb, &f->ematches, NULL)) continue; keymask = f->keymask; if (keymask & FLOW_KEYS_NEEDED) skb_flow_dissect_flow_keys(skb, &flow_keys, 0); for (n = 0; n < f->nkeys; n++) { key = ffs(keymask) - 1; keymask &= ~(1 << key); keys[n] = flow_key_get(skb, key, &flow_keys); } if (f->mode == FLOW_MODE_HASH) classid = jhash2(keys, f->nkeys, f->hashrnd); else { classid = keys[0]; classid = (classid & f->mask) ^ f->xor; classid = (classid >> f->rshift) + f->addend; } if (f->divisor) classid %= f->divisor; res->class = 0; res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid); r = tcf_exts_exec(skb, &f->exts, res); if (r < 0) continue; return r; } return -1; } static void flow_perturbation(struct timer_list *t) { struct flow_filter *f = from_timer(f, t, perturb_timer); get_random_bytes(&f->hashrnd, 4); if (f->perturb_period) mod_timer(&f->perturb_timer, jiffies + f->perturb_period); } static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_KEYS] = { .type = NLA_U32 }, [TCA_FLOW_MODE] = { .type = NLA_U32 }, [TCA_FLOW_BASECLASS] = { .type = NLA_U32 }, [TCA_FLOW_RSHIFT] = { .type = NLA_U32 }, [TCA_FLOW_ADDEND] = { .type = NLA_U32 }, [TCA_FLOW_MASK] = { .type = NLA_U32 }, [TCA_FLOW_XOR] = { .type = NLA_U32 }, [TCA_FLOW_DIVISOR] = { .type = NLA_U32 }, [TCA_FLOW_ACT] = { .type = NLA_NESTED }, [TCA_FLOW_POLICE] = { .type = NLA_NESTED }, [TCA_FLOW_EMATCHES] = { .type = NLA_NESTED }, [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; static void __flow_destroy_filter(struct flow_filter *f) { timer_shutdown_sync(&f->perturb_timer); tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); tcf_exts_put_net(&f->exts); kfree(f); } static void flow_destroy_filter_work(struct work_struct *work) { struct flow_filter *f = container_of(to_rcu_work(work), struct flow_filter, rwork); rtnl_lock(); __flow_destroy_filter(f); rtnl_unlock(); } static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, u32 flags, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *fold, *fnew; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FLOW_MAX + 1]; unsigned int nkeys = 0; unsigned int perturb_period = 0; u32 baseclass = 0; u32 keymask = 0; u32 mode; int err; if (opt == NULL) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_FLOW_MAX, opt, flow_policy, NULL); if (err < 0) return err; if (tb[TCA_FLOW_BASECLASS]) { baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]); if (TC_H_MIN(baseclass) == 0) return -EINVAL; } if (tb[TCA_FLOW_KEYS]) { keymask = nla_get_u32(tb[TCA_FLOW_KEYS]); nkeys = hweight32(keymask); if (nkeys == 0) return -EINVAL; if (fls(keymask) - 1 > FLOW_KEY_MAX) return -EOPNOTSUPP; if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) && sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns) return -EOPNOTSUPP; } fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); if (!fnew) return -ENOBUFS; err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches); if (err < 0) goto err1; err = tcf_exts_init(&fnew->exts, net, TCA_FLOW_ACT, TCA_FLOW_POLICE); if (err < 0) goto err2; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, flags, extack); if (err < 0) goto err2; fold = *arg; if (fold) { err = -EINVAL; if (fold->handle != handle && handle) goto err2; /* Copy fold into fnew */ fnew->tp = fold->tp; fnew->handle = fold->handle; fnew->nkeys = fold->nkeys; fnew->keymask = fold->keymask; fnew->mode = fold->mode; fnew->mask = fold->mask; fnew->xor = fold->xor; fnew->rshift = fold->rshift; fnew->addend = fold->addend; fnew->divisor = fold->divisor; fnew->baseclass = fold->baseclass; fnew->hashrnd = fold->hashrnd; mode = fold->mode; if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) goto err2; if (mode == FLOW_MODE_HASH) perturb_period = fold->perturb_period; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) goto err2; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } } else { err = -EINVAL; if (!handle) goto err2; if (!tb[TCA_FLOW_KEYS]) goto err2; mode = FLOW_MODE_MAP; if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) goto err2; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) goto err2; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } if (TC_H_MAJ(baseclass) == 0) { struct Qdisc *q = tcf_block_q(tp->chain->block); baseclass = TC_H_MAKE(q->handle, baseclass); } if (TC_H_MIN(baseclass) == 0) baseclass = TC_H_MAKE(baseclass, 1); fnew->handle = handle; fnew->mask = ~0U; fnew->tp = tp; get_random_bytes(&fnew->hashrnd, 4); } timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE); tcf_block_netif_keep_dst(tp->chain->block); if (tb[TCA_FLOW_KEYS]) { fnew->keymask = keymask; fnew->nkeys = nkeys; } fnew->mode = mode; if (tb[TCA_FLOW_MASK]) fnew->mask = nla_get_u32(tb[TCA_FLOW_MASK]); if (tb[TCA_FLOW_XOR]) fnew->xor = nla_get_u32(tb[TCA_FLOW_XOR]); if (tb[TCA_FLOW_RSHIFT]) fnew->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]); if (tb[TCA_FLOW_ADDEND]) fnew->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]); if (tb[TCA_FLOW_DIVISOR]) fnew->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]); if (baseclass) fnew->baseclass = baseclass; fnew->perturb_period = perturb_period; if (perturb_period) mod_timer(&fnew->perturb_timer, jiffies + perturb_period); if (!*arg) list_add_tail_rcu(&fnew->list, &head->filters); else list_replace_rcu(&fold->list, &fnew->list); *arg = fnew; if (fold) { tcf_exts_get_net(&fold->exts); tcf_queue_work(&fold->rwork, flow_destroy_filter_work); } return 0; err2: tcf_exts_destroy(&fnew->exts); tcf_em_tree_destroy(&fnew->ematches); err1: kfree(fnew); return err; } static int flow_delete(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f = arg; list_del_rcu(&f->list); tcf_exts_get_net(&f->exts); tcf_queue_work(&f->rwork, flow_destroy_filter_work); *last = list_empty(&head->filters); return 0; } static int flow_init(struct tcf_proto *tp) { struct flow_head *head; head = kzalloc(sizeof(*head), GFP_KERNEL); if (head == NULL) return -ENOBUFS; INIT_LIST_HEAD(&head->filters); rcu_assign_pointer(tp->root, head); return 0; } static void flow_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f, *next; list_for_each_entry_safe(f, next, &head->filters, list) { list_del_rcu(&f->list); if (tcf_exts_get_net(&f->exts)) tcf_queue_work(&f->rwork, flow_destroy_filter_work); else __flow_destroy_filter(f); } kfree_rcu(head, rcu); } static void *flow_get(struct tcf_proto *tp, u32 handle) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; list_for_each_entry(f, &head->filters, list) if (f->handle == handle) return f; return NULL; } static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct flow_filter *f = fh; struct nlattr *nest; if (f == NULL) return skb->len; t->tcm_handle = f->handle; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (nla_put_u32(skb, TCA_FLOW_KEYS, f->keymask) || nla_put_u32(skb, TCA_FLOW_MODE, f->mode)) goto nla_put_failure; if (f->mask != ~0 || f->xor != 0) { if (nla_put_u32(skb, TCA_FLOW_MASK, f->mask) || nla_put_u32(skb, TCA_FLOW_XOR, f->xor)) goto nla_put_failure; } if (f->rshift && nla_put_u32(skb, TCA_FLOW_RSHIFT, f->rshift)) goto nla_put_failure; if (f->addend && nla_put_u32(skb, TCA_FLOW_ADDEND, f->addend)) goto nla_put_failure; if (f->divisor && nla_put_u32(skb, TCA_FLOW_DIVISOR, f->divisor)) goto nla_put_failure; if (f->baseclass && nla_put_u32(skb, TCA_FLOW_BASECLASS, f->baseclass)) goto nla_put_failure; if (f->perturb_period && nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ)) goto nla_put_failure; if (tcf_exts_dump(skb, &f->exts) < 0) goto nla_put_failure; #ifdef CONFIG_NET_EMATCH if (f->ematches.hdr.nmatches && tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0) goto nla_put_failure; #endif nla_nest_end(skb, nest); if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; list_for_each_entry(f, &head->filters, list) { if (!tc_cls_stats_dump(tp, arg, f)) break; } } static struct tcf_proto_ops cls_flow_ops __read_mostly = { .kind = "flow", .classify = flow_classify, .init = flow_init, .destroy = flow_destroy, .change = flow_change, .delete = flow_delete, .get = flow_get, .dump = flow_dump, .walk = flow_walk, .owner = THIS_MODULE, }; static int __init cls_flow_init(void) { return register_tcf_proto_ops(&cls_flow_ops); } static void __exit cls_flow_exit(void) { unregister_tcf_proto_ops(&cls_flow_ops); } module_init(cls_flow_init); module_exit(cls_flow_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("TC flow classifier"); |
2 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | // SPDX-License-Identifier: GPL-2.0-only /* * net/dccp/diag.c * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@mandriva.com> */ #include <linux/module.h> #include <linux/inet_diag.h> #include "ccid.h" #include "dccp.h" static void dccp_get_info(struct sock *sk, struct tcp_info *info) { struct dccp_sock *dp = dccp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); memset(info, 0, sizeof(*info)); info->tcpi_state = sk->sk_state; info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_probes = icsk->icsk_probes_out; info->tcpi_backoff = icsk->icsk_backoff; info->tcpi_pmtu = icsk->icsk_pmtu_cookie; if (dp->dccps_hc_rx_ackvec != NULL) info->tcpi_options |= TCPI_OPT_SACK; if (dp->dccps_hc_rx_ccid != NULL) ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info); if (dp->dccps_hc_tx_ccid != NULL) ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info); } static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *_info) { r->idiag_rqueue = r->idiag_wqueue = 0; if (_info != NULL) dccp_get_info(sk, _info); } static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r); } static int dccp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { return inet_diag_dump_one_icsk(&dccp_hashinfo, cb, req); } static const struct inet_diag_handler dccp_diag_handler = { .dump = dccp_diag_dump, .dump_one = dccp_diag_dump_one, .idiag_get_info = dccp_diag_get_info, .idiag_type = IPPROTO_DCCP, .idiag_info_size = sizeof(struct tcp_info), }; static int __init dccp_diag_init(void) { return inet_diag_register(&dccp_diag_handler); } static void __exit dccp_diag_fini(void) { inet_diag_unregister(&dccp_diag_handler); } module_init(dccp_diag_init); module_exit(dccp_diag_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); MODULE_DESCRIPTION("DCCP inet_diag handler"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-33 /* AF_INET - IPPROTO_DCCP */); |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 | /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* * include/uapi/linux/tipc_config.h: Header for TIPC configuration interface * * Copyright (c) 2003-2006, Ericsson AB * Copyright (c) 2005-2007, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_TIPC_CONFIG_H_ #define _LINUX_TIPC_CONFIG_H_ #include <linux/types.h> #include <linux/string.h> #include <linux/tipc.h> #include <asm/byteorder.h> /* * Configuration * * All configuration management messaging involves sending a request message * to the TIPC configuration service on a node, which sends a reply message * back. (In the future multi-message replies may be supported.) * * Both request and reply messages consist of a transport header and payload. * The transport header contains info about the desired operation; * the payload consists of zero or more type/length/value (TLV) items * which specify parameters or results for the operation. * * For many operations, the request and reply messages have a fixed number * of TLVs (usually zero or one); however, some reply messages may return * a variable number of TLVs. A failed request is denoted by the presence * of an "error string" TLV in the reply message instead of the TLV(s) the * reply should contain if the request succeeds. */ /* * Public commands: * May be issued by any process. * Accepted by own node, or by remote node only if remote management enabled. */ #define TIPC_CMD_NOOP 0x0000 /* tx none, rx none */ #define TIPC_CMD_GET_NODES 0x0001 /* tx net_addr, rx node_info(s) */ #define TIPC_CMD_GET_MEDIA_NAMES 0x0002 /* tx none, rx media_name(s) */ #define TIPC_CMD_GET_BEARER_NAMES 0x0003 /* tx none, rx bearer_name(s) */ #define TIPC_CMD_GET_LINKS 0x0004 /* tx net_addr, rx link_info(s) */ #define TIPC_CMD_SHOW_NAME_TABLE 0x0005 /* tx name_tbl_query, rx ultra_string */ #define TIPC_CMD_SHOW_PORTS 0x0006 /* tx none, rx ultra_string */ #define TIPC_CMD_SHOW_LINK_STATS 0x000B /* tx link_name, rx ultra_string */ #define TIPC_CMD_SHOW_STATS 0x000F /* tx unsigned, rx ultra_string */ /* * Protected commands: * May only be issued by "network administration capable" process. * Accepted by own node, or by remote node only if remote management enabled * and this node is zone manager. */ #define TIPC_CMD_GET_REMOTE_MNG 0x4003 /* tx none, rx unsigned */ #define TIPC_CMD_GET_MAX_PORTS 0x4004 /* tx none, rx unsigned */ #define TIPC_CMD_GET_MAX_PUBL 0x4005 /* obsoleted */ #define TIPC_CMD_GET_MAX_SUBSCR 0x4006 /* obsoleted */ #define TIPC_CMD_GET_MAX_ZONES 0x4007 /* obsoleted */ #define TIPC_CMD_GET_MAX_CLUSTERS 0x4008 /* obsoleted */ #define TIPC_CMD_GET_MAX_NODES 0x4009 /* obsoleted */ #define TIPC_CMD_GET_MAX_SLAVES 0x400A /* obsoleted */ #define TIPC_CMD_GET_NETID 0x400B /* tx none, rx unsigned */ #define TIPC_CMD_ENABLE_BEARER 0x4101 /* tx bearer_config, rx none */ #define TIPC_CMD_DISABLE_BEARER 0x4102 /* tx bearer_name, rx none */ #define TIPC_CMD_SET_LINK_TOL 0x4107 /* tx link_config, rx none */ #define TIPC_CMD_SET_LINK_PRI 0x4108 /* tx link_config, rx none */ #define TIPC_CMD_SET_LINK_WINDOW 0x4109 /* tx link_config, rx none */ #define TIPC_CMD_SET_LOG_SIZE 0x410A /* obsoleted */ #define TIPC_CMD_DUMP_LOG 0x410B /* obsoleted */ #define TIPC_CMD_RESET_LINK_STATS 0x410C /* tx link_name, rx none */ /* * Private commands: * May only be issued by "network administration capable" process. * Accepted by own node only; cannot be used on a remote node. */ #define TIPC_CMD_SET_NODE_ADDR 0x8001 /* tx net_addr, rx none */ #define TIPC_CMD_SET_REMOTE_MNG 0x8003 /* tx unsigned, rx none */ #define TIPC_CMD_SET_MAX_PORTS 0x8004 /* tx unsigned, rx none */ #define TIPC_CMD_SET_MAX_PUBL 0x8005 /* obsoleted */ #define TIPC_CMD_SET_MAX_SUBSCR 0x8006 /* obsoleted */ #define TIPC_CMD_SET_MAX_ZONES 0x8007 /* obsoleted */ #define TIPC_CMD_SET_MAX_CLUSTERS 0x8008 /* obsoleted */ #define TIPC_CMD_SET_MAX_NODES 0x8009 /* obsoleted */ #define TIPC_CMD_SET_MAX_SLAVES 0x800A /* obsoleted */ #define TIPC_CMD_SET_NETID 0x800B /* tx unsigned, rx none */ /* * Reserved commands: * May not be issued by any process. * Used internally by TIPC. */ #define TIPC_CMD_NOT_NET_ADMIN 0xC001 /* tx none, rx none */ /* * TLV types defined for TIPC */ #define TIPC_TLV_NONE 0 /* no TLV present */ #define TIPC_TLV_VOID 1 /* empty TLV (0 data bytes)*/ #define TIPC_TLV_UNSIGNED 2 /* 32-bit integer */ #define TIPC_TLV_STRING 3 /* char[128] (max) */ #define TIPC_TLV_LARGE_STRING 4 /* char[2048] (max) */ #define TIPC_TLV_ULTRA_STRING 5 /* char[32768] (max) */ #define TIPC_TLV_ERROR_STRING 16 /* char[128] containing "error code" */ #define TIPC_TLV_NET_ADDR 17 /* 32-bit integer denoting <Z.C.N> */ #define TIPC_TLV_MEDIA_NAME 18 /* char[TIPC_MAX_MEDIA_NAME] */ #define TIPC_TLV_BEARER_NAME 19 /* char[TIPC_MAX_BEARER_NAME] */ #define TIPC_TLV_LINK_NAME 20 /* char[TIPC_MAX_LINK_NAME] */ #define TIPC_TLV_NODE_INFO 21 /* struct tipc_node_info */ #define TIPC_TLV_LINK_INFO 22 /* struct tipc_link_info */ #define TIPC_TLV_BEARER_CONFIG 23 /* struct tipc_bearer_config */ #define TIPC_TLV_LINK_CONFIG 24 /* struct tipc_link_config */ #define TIPC_TLV_NAME_TBL_QUERY 25 /* struct tipc_name_table_query */ #define TIPC_TLV_PORT_REF 26 /* 32-bit port reference */ /* * Link priority limits (min, default, max, media default) */ #define TIPC_MIN_LINK_PRI 0 #define TIPC_DEF_LINK_PRI 10 #define TIPC_MAX_LINK_PRI 31 #define TIPC_MEDIA_LINK_PRI (TIPC_MAX_LINK_PRI + 1) /* * Link tolerance limits (min, default, max), in ms */ #define TIPC_MIN_LINK_TOL 50 #define TIPC_DEF_LINK_TOL 1500 #define TIPC_MAX_LINK_TOL 30000 #if (TIPC_MIN_LINK_TOL < 16) #error "TIPC_MIN_LINK_TOL is too small (abort limit may be NaN)" #endif /* * Link window limits (min, default, max), in packets */ #define TIPC_MIN_LINK_WIN 16 #define TIPC_DEF_LINK_WIN 50 #define TIPC_MAX_LINK_WIN 8191 /* * Default MTU for UDP media */ #define TIPC_DEF_LINK_UDP_MTU 14000 struct tipc_node_info { __be32 addr; /* network address of node */ __be32 up; /* 0=down, 1= up */ }; struct tipc_link_info { __be32 dest; /* network address of peer node */ __be32 up; /* 0=down, 1=up */ char str[TIPC_MAX_LINK_NAME]; /* link name */ }; struct tipc_bearer_config { __be32 priority; /* Range [1,31]. Override per link */ __be32 disc_domain; /* <Z.C.N> describing desired nodes */ char name[TIPC_MAX_BEARER_NAME]; }; struct tipc_link_config { __be32 value; char name[TIPC_MAX_LINK_NAME]; }; #define TIPC_NTQ_ALLTYPES 0x80000000 struct tipc_name_table_query { __be32 depth; /* 1:type, 2:+name info, 3:+port info, 4+:+debug info */ __be32 type; /* {t,l,u} info ignored if high bit of "depth" is set */ __be32 lowbound; /* (i.e. displays all entries of name table) */ __be32 upbound; }; /* * The error string TLV is a null-terminated string describing the cause * of the request failure. To simplify error processing (and to save space) * the first character of the string can be a special error code character * (lying by the range 0x80 to 0xFF) which represents a pre-defined reason. */ #define TIPC_CFG_TLV_ERROR "\x80" /* request contains incorrect TLV(s) */ #define TIPC_CFG_NOT_NET_ADMIN "\x81" /* must be network administrator */ #define TIPC_CFG_NOT_ZONE_MSTR "\x82" /* must be zone master */ #define TIPC_CFG_NO_REMOTE "\x83" /* remote management not enabled */ #define TIPC_CFG_NOT_SUPPORTED "\x84" /* request is not supported by TIPC */ #define TIPC_CFG_INVALID_VALUE "\x85" /* request has invalid argument value */ /* * A TLV consists of a descriptor, followed by the TLV value. * TLV descriptor fields are stored in network byte order; * TLV values must also be stored in network byte order (where applicable). * TLV descriptors must be aligned to addresses which are multiple of 4, * so up to 3 bytes of padding may exist at the end of the TLV value area. * There must not be any padding between the TLV descriptor and its value. */ struct tlv_desc { __be16 tlv_len; /* TLV length (descriptor + value) */ __be16 tlv_type; /* TLV identifier */ }; #define TLV_ALIGNTO 4 #define TLV_ALIGN(datalen) (((datalen)+(TLV_ALIGNTO-1)) & ~(TLV_ALIGNTO-1)) #define TLV_LENGTH(datalen) (sizeof(struct tlv_desc) + (datalen)) #define TLV_SPACE(datalen) (TLV_ALIGN(TLV_LENGTH(datalen))) #define TLV_DATA(tlv) ((void *)((char *)(tlv) + TLV_LENGTH(0))) static inline int TLV_OK(const void *tlv, __u16 space) { /* * Would also like to check that "tlv" is a multiple of 4, * but don't know how to do this in a portable way. * - Tried doing (!(tlv & (TLV_ALIGNTO-1))), but GCC compiler * won't allow binary "&" with a pointer. * - Tried casting "tlv" to integer type, but causes warning about size * mismatch when pointer is bigger than chosen type (int, long, ...). */ return (space >= TLV_SPACE(0)) && (__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_len) <= space); } static inline int TLV_CHECK(const void *tlv, __u16 space, __u16 exp_type) { return TLV_OK(tlv, space) && (__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_type) == exp_type); } static inline int TLV_GET_LEN(struct tlv_desc *tlv) { return __be16_to_cpu(tlv->tlv_len); } static inline void TLV_SET_LEN(struct tlv_desc *tlv, __u16 len) { tlv->tlv_len = __cpu_to_be16(len); } static inline int TLV_CHECK_TYPE(struct tlv_desc *tlv, __u16 type) { return (__be16_to_cpu(tlv->tlv_type) == type); } static inline void TLV_SET_TYPE(struct tlv_desc *tlv, __u16 type) { tlv->tlv_type = __cpu_to_be16(type); } static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len) { struct tlv_desc *tlv_ptr; int tlv_len; tlv_len = TLV_LENGTH(len); tlv_ptr = (struct tlv_desc *)tlv; tlv_ptr->tlv_type = __cpu_to_be16(type); tlv_ptr->tlv_len = __cpu_to_be16(tlv_len); if (len && data) { memcpy(TLV_DATA(tlv_ptr), data, len); memset((char *)TLV_DATA(tlv_ptr) + len, 0, TLV_SPACE(len) - tlv_len); } return TLV_SPACE(len); } /* * A TLV list descriptor simplifies processing of messages * containing multiple TLVs. */ struct tlv_list_desc { struct tlv_desc *tlv_ptr; /* ptr to current TLV */ __u32 tlv_space; /* # bytes from curr TLV to list end */ }; static inline void TLV_LIST_INIT(struct tlv_list_desc *list, void *data, __u32 space) { list->tlv_ptr = (struct tlv_desc *)data; list->tlv_space = space; } static inline int TLV_LIST_EMPTY(struct tlv_list_desc *list) { return (list->tlv_space == 0); } static inline int TLV_LIST_CHECK(struct tlv_list_desc *list, __u16 exp_type) { return TLV_CHECK(list->tlv_ptr, list->tlv_space, exp_type); } static inline void *TLV_LIST_DATA(struct tlv_list_desc *list) { return TLV_DATA(list->tlv_ptr); } static inline void TLV_LIST_STEP(struct tlv_list_desc *list) { __u16 tlv_space = TLV_ALIGN(__be16_to_cpu(list->tlv_ptr->tlv_len)); list->tlv_ptr = (struct tlv_desc *)((char *)list->tlv_ptr + tlv_space); list->tlv_space -= tlv_space; } /* * Configuration messages exchanged via NETLINK_GENERIC use the following * family id, name, version and command. */ #define TIPC_GENL_NAME "TIPC" #define TIPC_GENL_VERSION 0x1 #define TIPC_GENL_CMD 0x1 /* * TIPC specific header used in NETLINK_GENERIC requests. */ struct tipc_genlmsghdr { __u32 dest; /* Destination address */ __u16 cmd; /* Command */ __u16 reserved; /* Unused */ }; #define TIPC_GENL_HDRLEN NLMSG_ALIGN(sizeof(struct tipc_genlmsghdr)) /* * Configuration messages exchanged via TIPC sockets use the TIPC configuration * message header, which is defined below. This structure is analogous * to the Netlink message header, but fields are stored in network byte order * and no padding is permitted between the header and the message data * that follows. */ struct tipc_cfg_msg_hdr { __be32 tcm_len; /* Message length (including header) */ __be16 tcm_type; /* Command type */ __be16 tcm_flags; /* Additional flags */ char tcm_reserved[8]; /* Unused */ }; #define TCM_F_REQUEST 0x1 /* Flag: Request message */ #define TCM_F_MORE 0x2 /* Flag: Message to be continued */ #define TCM_ALIGN(datalen) (((datalen)+3) & ~3) #define TCM_LENGTH(datalen) (sizeof(struct tipc_cfg_msg_hdr) + datalen) #define TCM_SPACE(datalen) (TCM_ALIGN(TCM_LENGTH(datalen))) #define TCM_DATA(tcm_hdr) ((void *)((char *)(tcm_hdr) + TCM_LENGTH(0))) static inline int TCM_SET(void *msg, __u16 cmd, __u16 flags, void *data, __u16 data_len) { struct tipc_cfg_msg_hdr *tcm_hdr; int msg_len; msg_len = TCM_LENGTH(data_len); tcm_hdr = (struct tipc_cfg_msg_hdr *)msg; tcm_hdr->tcm_len = __cpu_to_be32(msg_len); tcm_hdr->tcm_type = __cpu_to_be16(cmd); tcm_hdr->tcm_flags = __cpu_to_be16(flags); if (data_len && data) { memcpy(TCM_DATA(msg), data, data_len); memset((char *)TCM_DATA(msg) + data_len, 0, TCM_SPACE(data_len) - msg_len); } return TCM_SPACE(data_len); } #endif |
1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 | /* BNEP implementation for Linux Bluetooth stack (BlueZ). Copyright (C) 2001-2002 Inventel Systemes Written 2001-2002 by Clément Moreau <clement.moreau@inventel.fr> David Libault <david.libault@inventel.fr> Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ #include <linux/module.h> #include <linux/kthread.h> #include <linux/file.h> #include <linux/etherdevice.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/l2cap.h> #include <net/bluetooth/hci_core.h> #include "bnep.h" #define VERSION "1.3" static bool compress_src = true; static bool compress_dst = true; static LIST_HEAD(bnep_session_list); static DECLARE_RWSEM(bnep_session_sem); static struct bnep_session *__bnep_get_session(u8 *dst) { struct bnep_session *s; BT_DBG(""); list_for_each_entry(s, &bnep_session_list, list) if (ether_addr_equal(dst, s->eh.h_source)) return s; return NULL; } static void __bnep_link_session(struct bnep_session *s) { list_add(&s->list, &bnep_session_list); } static void __bnep_unlink_session(struct bnep_session *s) { list_del(&s->list); } static int bnep_send(struct bnep_session *s, void *data, size_t len) { struct socket *sock = s->sock; struct kvec iv = { data, len }; return kernel_sendmsg(sock, &s->msg, &iv, 1, len); } static int bnep_send_rsp(struct bnep_session *s, u8 ctrl, u16 resp) { struct bnep_control_rsp rsp; rsp.type = BNEP_CONTROL; rsp.ctrl = ctrl; rsp.resp = htons(resp); return bnep_send(s, &rsp, sizeof(rsp)); } #ifdef CONFIG_BT_BNEP_PROTO_FILTER static inline void bnep_set_default_proto_filter(struct bnep_session *s) { /* (IPv4, ARP) */ s->proto_filter[0].start = ETH_P_IP; s->proto_filter[0].end = ETH_P_ARP; /* (RARP, AppleTalk) */ s->proto_filter[1].start = ETH_P_RARP; s->proto_filter[1].end = ETH_P_AARP; /* (IPX, IPv6) */ s->proto_filter[2].start = ETH_P_IPX; s->proto_filter[2].end = ETH_P_IPV6; } #endif static int bnep_ctrl_set_netfilter(struct bnep_session *s, __be16 *data, int len) { int n; if (len < 2) return -EILSEQ; n = get_unaligned_be16(data); data++; len -= 2; if (len < n) return -EILSEQ; BT_DBG("filter len %d", n); #ifdef CONFIG_BT_BNEP_PROTO_FILTER n /= 4; if (n <= BNEP_MAX_PROTO_FILTERS) { struct bnep_proto_filter *f = s->proto_filter; int i; for (i = 0; i < n; i++) { f[i].start = get_unaligned_be16(data++); f[i].end = get_unaligned_be16(data++); BT_DBG("proto filter start %u end %u", f[i].start, f[i].end); } if (i < BNEP_MAX_PROTO_FILTERS) memset(f + i, 0, sizeof(*f)); if (n == 0) bnep_set_default_proto_filter(s); bnep_send_rsp(s, BNEP_FILTER_NET_TYPE_RSP, BNEP_SUCCESS); } else { bnep_send_rsp(s, BNEP_FILTER_NET_TYPE_RSP, BNEP_FILTER_LIMIT_REACHED); } #else bnep_send_rsp(s, BNEP_FILTER_NET_TYPE_RSP, BNEP_FILTER_UNSUPPORTED_REQ); #endif return 0; } static int bnep_ctrl_set_mcfilter(struct bnep_session *s, u8 *data, int len) { int n; if (len < 2) return -EILSEQ; n = get_unaligned_be16(data); data += 2; len -= 2; if (len < n) return -EILSEQ; BT_DBG("filter len %d", n); #ifdef CONFIG_BT_BNEP_MC_FILTER n /= (ETH_ALEN * 2); if (n > 0) { int i; s->mc_filter = 0; /* Always send broadcast */ set_bit(bnep_mc_hash(s->dev->broadcast), (ulong *) &s->mc_filter); /* Add address ranges to the multicast hash */ for (; n > 0; n--) { u8 a1[6], *a2; memcpy(a1, data, ETH_ALEN); data += ETH_ALEN; a2 = data; data += ETH_ALEN; BT_DBG("mc filter %pMR -> %pMR", a1, a2); /* Iterate from a1 to a2 */ set_bit(bnep_mc_hash(a1), (ulong *) &s->mc_filter); while (memcmp(a1, a2, 6) < 0 && s->mc_filter != ~0LL) { /* Increment a1 */ i = 5; while (i >= 0 && ++a1[i--] == 0) ; set_bit(bnep_mc_hash(a1), (ulong *) &s->mc_filter); } } } BT_DBG("mc filter hash 0x%llx", s->mc_filter); bnep_send_rsp(s, BNEP_FILTER_MULTI_ADDR_RSP, BNEP_SUCCESS); #else bnep_send_rsp(s, BNEP_FILTER_MULTI_ADDR_RSP, BNEP_FILTER_UNSUPPORTED_REQ); #endif return 0; } static int bnep_rx_control(struct bnep_session *s, void *data, int len) { u8 cmd = *(u8 *)data; int err = 0; data++; len--; switch (cmd) { case BNEP_CMD_NOT_UNDERSTOOD: case BNEP_SETUP_CONN_RSP: case BNEP_FILTER_NET_TYPE_RSP: case BNEP_FILTER_MULTI_ADDR_RSP: /* Ignore these for now */ break; case BNEP_FILTER_NET_TYPE_SET: err = bnep_ctrl_set_netfilter(s, data, len); break; case BNEP_FILTER_MULTI_ADDR_SET: err = bnep_ctrl_set_mcfilter(s, data, len); break; case BNEP_SETUP_CONN_REQ: /* Successful response should be sent only once */ if (test_bit(BNEP_SETUP_RESPONSE, &s->flags) && !test_and_set_bit(BNEP_SETUP_RSP_SENT, &s->flags)) err = bnep_send_rsp(s, BNEP_SETUP_CONN_RSP, BNEP_SUCCESS); else err = bnep_send_rsp(s, BNEP_SETUP_CONN_RSP, BNEP_CONN_NOT_ALLOWED); break; default: { u8 pkt[3]; pkt[0] = BNEP_CONTROL; pkt[1] = BNEP_CMD_NOT_UNDERSTOOD; pkt[2] = cmd; err = bnep_send(s, pkt, sizeof(pkt)); } break; } return err; } static int bnep_rx_extension(struct bnep_session *s, struct sk_buff *skb) { struct bnep_ext_hdr *h; int err = 0; do { h = (void *) skb->data; if (!skb_pull(skb, sizeof(*h))) { err = -EILSEQ; break; } BT_DBG("type 0x%x len %u", h->type, h->len); switch (h->type & BNEP_TYPE_MASK) { case BNEP_EXT_CONTROL: bnep_rx_control(s, skb->data, skb->len); break; default: /* Unknown extension, skip it. */ break; } if (!skb_pull(skb, h->len)) { err = -EILSEQ; break; } } while (!err && (h->type & BNEP_EXT_HEADER)); return err; } static u8 __bnep_rx_hlen[] = { ETH_HLEN, /* BNEP_GENERAL */ 0, /* BNEP_CONTROL */ 2, /* BNEP_COMPRESSED */ ETH_ALEN + 2, /* BNEP_COMPRESSED_SRC_ONLY */ ETH_ALEN + 2 /* BNEP_COMPRESSED_DST_ONLY */ }; static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) { struct net_device *dev = s->dev; struct sk_buff *nskb; u8 type, ctrl_type; dev->stats.rx_bytes += skb->len; type = *(u8 *) skb->data; skb_pull(skb, 1); ctrl_type = *(u8 *)skb->data; if ((type & BNEP_TYPE_MASK) >= sizeof(__bnep_rx_hlen)) goto badframe; if ((type & BNEP_TYPE_MASK) == BNEP_CONTROL) { if (bnep_rx_control(s, skb->data, skb->len) < 0) { dev->stats.tx_errors++; kfree_skb(skb); return 0; } if (!(type & BNEP_EXT_HEADER)) { kfree_skb(skb); return 0; } /* Verify and pull ctrl message since it's already processed */ switch (ctrl_type) { case BNEP_SETUP_CONN_REQ: /* Pull: ctrl type (1 b), len (1 b), data (len bytes) */ if (!skb_pull(skb, 2 + *(u8 *)(skb->data + 1) * 2)) goto badframe; break; case BNEP_FILTER_MULTI_ADDR_SET: case BNEP_FILTER_NET_TYPE_SET: /* Pull: ctrl type (1 b), len (2 b), data (len bytes) */ if (!skb_pull(skb, 3 + *(u16 *)(skb->data + 1) * 2)) goto badframe; break; default: kfree_skb(skb); return 0; } } else { skb_reset_mac_header(skb); /* Verify and pull out header */ if (!skb_pull(skb, __bnep_rx_hlen[type & BNEP_TYPE_MASK])) goto badframe; s->eh.h_proto = get_unaligned((__be16 *) (skb->data - 2)); } if (type & BNEP_EXT_HEADER) { if (bnep_rx_extension(s, skb) < 0) goto badframe; } /* Strip 802.1p header */ if (ntohs(s->eh.h_proto) == ETH_P_8021Q) { if (!skb_pull(skb, 4)) goto badframe; s->eh.h_proto = get_unaligned((__be16 *) (skb->data - 2)); } /* We have to alloc new skb and copy data here :(. Because original skb * may not be modified and because of the alignment requirements. */ nskb = alloc_skb(2 + ETH_HLEN + skb->len, GFP_KERNEL); if (!nskb) { dev->stats.rx_dropped++; kfree_skb(skb); return -ENOMEM; } skb_reserve(nskb, 2); /* Decompress header and construct ether frame */ switch (type & BNEP_TYPE_MASK) { case BNEP_COMPRESSED: __skb_put_data(nskb, &s->eh, ETH_HLEN); break; case BNEP_COMPRESSED_SRC_ONLY: __skb_put_data(nskb, s->eh.h_dest, ETH_ALEN); __skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN); put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2)); break; case BNEP_COMPRESSED_DST_ONLY: __skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN); __skb_put_data(nskb, s->eh.h_source, ETH_ALEN + 2); break; case BNEP_GENERAL: __skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN * 2); put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2)); break; } skb_copy_from_linear_data(skb, __skb_put(nskb, skb->len), skb->len); kfree_skb(skb); dev->stats.rx_packets++; nskb->ip_summed = CHECKSUM_NONE; nskb->protocol = eth_type_trans(nskb, dev); netif_rx(nskb); return 0; badframe: dev->stats.rx_errors++; kfree_skb(skb); return 0; } static u8 __bnep_tx_types[] = { BNEP_GENERAL, BNEP_COMPRESSED_SRC_ONLY, BNEP_COMPRESSED_DST_ONLY, BNEP_COMPRESSED }; static int bnep_tx_frame(struct bnep_session *s, struct sk_buff *skb) { struct ethhdr *eh = (void *) skb->data; struct socket *sock = s->sock; struct kvec iv[3]; int len = 0, il = 0; u8 type = 0; BT_DBG("skb %p dev %p type %u", skb, skb->dev, skb->pkt_type); if (!skb->dev) { /* Control frame sent by us */ goto send; } iv[il++] = (struct kvec) { &type, 1 }; len++; if (compress_src && ether_addr_equal(eh->h_dest, s->eh.h_source)) type |= 0x01; if (compress_dst && ether_addr_equal(eh->h_source, s->eh.h_dest)) type |= 0x02; if (type) skb_pull(skb, ETH_ALEN * 2); type = __bnep_tx_types[type]; switch (type) { case BNEP_COMPRESSED_SRC_ONLY: iv[il++] = (struct kvec) { eh->h_source, ETH_ALEN }; len += ETH_ALEN; break; case BNEP_COMPRESSED_DST_ONLY: iv[il++] = (struct kvec) { eh->h_dest, ETH_ALEN }; len += ETH_ALEN; break; } send: iv[il++] = (struct kvec) { skb->data, skb->len }; len += skb->len; /* FIXME: linearize skb */ { len = kernel_sendmsg(sock, &s->msg, iv, il, len); } kfree_skb(skb); if (len > 0) { s->dev->stats.tx_bytes += len; s->dev->stats.tx_packets++; return 0; } return len; } static int bnep_session(void *arg) { struct bnep_session *s = arg; struct net_device *dev = s->dev; struct sock *sk = s->sock->sk; struct sk_buff *skb; DEFINE_WAIT_FUNC(wait, woken_wake_function); BT_DBG(""); set_user_nice(current, -15); add_wait_queue(sk_sleep(sk), &wait); while (1) { if (atomic_read(&s->terminate)) break; /* RX */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); if (!skb_linearize(skb)) bnep_rx_frame(s, skb); else kfree_skb(skb); } if (sk->sk_state != BT_CONNECTED) break; /* TX */ while ((skb = skb_dequeue(&sk->sk_write_queue))) if (bnep_tx_frame(s, skb)) break; netif_wake_queue(dev); /* * wait_woken() performs the necessary memory barriers * for us; see the header comment for this primitive. */ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } remove_wait_queue(sk_sleep(sk), &wait); /* Cleanup session */ down_write(&bnep_session_sem); /* Delete network device */ unregister_netdev(dev); /* Wakeup user-space polling for socket errors */ s->sock->sk->sk_err = EUNATCH; wake_up_interruptible(sk_sleep(s->sock->sk)); /* Release the socket */ fput(s->sock->file); __bnep_unlink_session(s); up_write(&bnep_session_sem); free_netdev(dev); module_put_and_kthread_exit(0); return 0; } static struct device *bnep_get_device(struct bnep_session *session) { struct l2cap_conn *conn = l2cap_pi(session->sock->sk)->chan->conn; if (!conn || !conn->hcon) return NULL; return &conn->hcon->dev; } static struct device_type bnep_type = { .name = "bluetooth", }; int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) { u32 valid_flags = BIT(BNEP_SETUP_RESPONSE); struct net_device *dev; struct bnep_session *s, *ss; u8 dst[ETH_ALEN], src[ETH_ALEN]; int err; BT_DBG(""); if (!l2cap_is_socket(sock)) return -EBADFD; if (req->flags & ~valid_flags) return -EINVAL; baswap((void *) dst, &l2cap_pi(sock->sk)->chan->dst); baswap((void *) src, &l2cap_pi(sock->sk)->chan->src); /* session struct allocated as private part of net_device */ dev = alloc_netdev(sizeof(struct bnep_session), (*req->device) ? req->device : "bnep%d", NET_NAME_UNKNOWN, bnep_net_setup); if (!dev) return -ENOMEM; down_write(&bnep_session_sem); ss = __bnep_get_session(dst); if (ss && ss->state == BT_CONNECTED) { err = -EEXIST; goto failed; } s = netdev_priv(dev); /* This is rx header therefore addresses are swapped. * ie. eh.h_dest is our local address. */ memcpy(s->eh.h_dest, &src, ETH_ALEN); memcpy(s->eh.h_source, &dst, ETH_ALEN); eth_hw_addr_set(dev, s->eh.h_dest); s->dev = dev; s->sock = sock; s->role = req->role; s->state = BT_CONNECTED; s->flags = req->flags; s->msg.msg_flags = MSG_NOSIGNAL; #ifdef CONFIG_BT_BNEP_MC_FILTER /* Set default mc filter to not filter out any mc addresses * as defined in the BNEP specification (revision 0.95a) * http://grouper.ieee.org/groups/802/15/Bluetooth/BNEP.pdf */ s->mc_filter = ~0LL; #endif #ifdef CONFIG_BT_BNEP_PROTO_FILTER /* Set default protocol filter */ bnep_set_default_proto_filter(s); #endif SET_NETDEV_DEV(dev, bnep_get_device(s)); SET_NETDEV_DEVTYPE(dev, &bnep_type); err = register_netdev(dev); if (err) goto failed; __bnep_link_session(s); __module_get(THIS_MODULE); s->task = kthread_run(bnep_session, s, "kbnepd %s", dev->name); if (IS_ERR(s->task)) { /* Session thread start failed, gotta cleanup. */ module_put(THIS_MODULE); unregister_netdev(dev); __bnep_unlink_session(s); err = PTR_ERR(s->task); goto failed; } up_write(&bnep_session_sem); strcpy(req->device, dev->name); return 0; failed: up_write(&bnep_session_sem); free_netdev(dev); return err; } int bnep_del_connection(struct bnep_conndel_req *req) { u32 valid_flags = 0; struct bnep_session *s; int err = 0; BT_DBG(""); if (req->flags & ~valid_flags) return -EINVAL; down_read(&bnep_session_sem); s = __bnep_get_session(req->dst); if (s) { atomic_inc(&s->terminate); wake_up_interruptible(sk_sleep(s->sock->sk)); } else err = -ENOENT; up_read(&bnep_session_sem); return err; } static void __bnep_copy_ci(struct bnep_conninfo *ci, struct bnep_session *s) { u32 valid_flags = BIT(BNEP_SETUP_RESPONSE); memset(ci, 0, sizeof(*ci)); memcpy(ci->dst, s->eh.h_source, ETH_ALEN); strcpy(ci->device, s->dev->name); ci->flags = s->flags & valid_flags; ci->state = s->state; ci->role = s->role; } int bnep_get_connlist(struct bnep_connlist_req *req) { struct bnep_session *s; int err = 0, n = 0; down_read(&bnep_session_sem); list_for_each_entry(s, &bnep_session_list, list) { struct bnep_conninfo ci; __bnep_copy_ci(&ci, s); if (copy_to_user(req->ci, &ci, sizeof(ci))) { err = -EFAULT; break; } if (++n >= req->cnum) break; req->ci++; } req->cnum = n; up_read(&bnep_session_sem); return err; } int bnep_get_conninfo(struct bnep_conninfo *ci) { struct bnep_session *s; int err = 0; down_read(&bnep_session_sem); s = __bnep_get_session(ci->dst); if (s) __bnep_copy_ci(ci, s); else err = -ENOENT; up_read(&bnep_session_sem); return err; } static int __init bnep_init(void) { char flt[50] = ""; #ifdef CONFIG_BT_BNEP_PROTO_FILTER strcat(flt, "protocol "); #endif #ifdef CONFIG_BT_BNEP_MC_FILTER strcat(flt, "multicast"); #endif BT_INFO("BNEP (Ethernet Emulation) ver %s", VERSION); if (flt[0]) BT_INFO("BNEP filters: %s", flt); bnep_sock_init(); return 0; } static void __exit bnep_exit(void) { bnep_sock_cleanup(); } module_init(bnep_init); module_exit(bnep_exit); module_param(compress_src, bool, 0644); MODULE_PARM_DESC(compress_src, "Compress sources headers"); module_param(compress_dst, bool, 0644); MODULE_PARM_DESC(compress_dst, "Compress destination headers"); MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth BNEP ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS("bt-proto-4"); |
19 19 19 19 19 10 1 2 2 1 2 2 2 2 1 1 2 1 1 1 2 1 5 1 1 2 2 1 1 1 2 3 1 1 2 2 1 3 19 19 18 1 19 20 19 12 2 20 19 12 2 17 16 16 19 16 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 19 19 16 16 22 2 19 3 19 16 22 19 1 9 1 2 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. */ #include <linux/blkdev.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/time.h> #include <linux/init.h> #include <linux/seq_file.h> #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/mount.h> #include <linux/writeback.h> #include <linux/statfs.h> #include <linux/compat.h> #include <linux/parser.h> #include <linux/ctype.h> #include <linux/namei.h> #include <linux/miscdevice.h> #include <linux/magic.h> #include <linux/slab.h> #include <linux/ratelimit.h> #include <linux/crc32c.h> #include <linux/btrfs.h> #include <linux/security.h> #include "messages.h" #include "delayed-inode.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" #include "print-tree.h" #include "props.h" #include "xattr.h" #include "bio.h" #include "export.h" #include "compression.h" #include "rcu-string.h" #include "dev-replace.h" #include "free-space-cache.h" #include "backref.h" #include "space-info.h" #include "sysfs.h" #include "zoned.h" #include "tests/btrfs-tests.h" #include "block-group.h" #include "discard.h" #include "qgroup.h" #include "raid56.h" #include "fs.h" #include "accessors.h" #include "defrag.h" #include "dir-item.h" #include "ioctl.h" #include "scrub.h" #include "verity.h" #include "super.h" #include "extent-tree.h" #define CREATE_TRACE_POINTS #include <trace/events/btrfs.h> static const struct super_operations btrfs_super_ops; /* * Types for mounting the default subvolume and a subvolume explicitly * requested by subvol=/path. That way the callchain is straightforward and we * don't have to play tricks with the mount options and recursive calls to * btrfs_mount. * * The new btrfs_root_fs_type also servers as a tag for the bdev_holder. */ static struct file_system_type btrfs_fs_type; static struct file_system_type btrfs_root_fs_type; static int btrfs_remount(struct super_block *sb, int *flags, char *data); static void btrfs_put_super(struct super_block *sb) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); btrfs_info(fs_info, "last unmount of filesystem %pU", fs_info->fs_devices->fsid); close_ctree(fs_info); } enum { Opt_acl, Opt_noacl, Opt_clear_cache, Opt_commit_interval, Opt_compress, Opt_compress_force, Opt_compress_force_type, Opt_compress_type, Opt_degraded, Opt_device, Opt_fatal_errors, Opt_flushoncommit, Opt_noflushoncommit, Opt_max_inline, Opt_barrier, Opt_nobarrier, Opt_datacow, Opt_nodatacow, Opt_datasum, Opt_nodatasum, Opt_defrag, Opt_nodefrag, Opt_discard, Opt_nodiscard, Opt_discard_mode, Opt_norecovery, Opt_ratio, Opt_rescan_uuid_tree, Opt_skip_balance, Opt_space_cache, Opt_no_space_cache, Opt_space_cache_version, Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_nossd_spread, Opt_subvol, Opt_subvol_empty, Opt_subvolid, Opt_thread_pool, Opt_treelog, Opt_notreelog, Opt_user_subvol_rm_allowed, /* Rescue options */ Opt_rescue, Opt_usebackuproot, Opt_nologreplay, Opt_ignorebadroots, Opt_ignoredatacsums, Opt_rescue_all, /* Deprecated options */ Opt_recovery, Opt_inode_cache, Opt_noinode_cache, /* Debugging options */ Opt_enospc_debug, Opt_noenospc_debug, #ifdef CONFIG_BTRFS_DEBUG Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all, #endif #ifdef CONFIG_BTRFS_FS_REF_VERIFY Opt_ref_verify, #endif Opt_err, }; static const match_table_t tokens = { {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, {Opt_clear_cache, "clear_cache"}, {Opt_commit_interval, "commit=%u"}, {Opt_compress, "compress"}, {Opt_compress_type, "compress=%s"}, {Opt_compress_force, "compress-force"}, {Opt_compress_force_type, "compress-force=%s"}, {Opt_degraded, "degraded"}, {Opt_device, "device=%s"}, {Opt_fatal_errors, "fatal_errors=%s"}, {Opt_flushoncommit, "flushoncommit"}, {Opt_noflushoncommit, "noflushoncommit"}, {Opt_inode_cache, "inode_cache"}, {Opt_noinode_cache, "noinode_cache"}, {Opt_max_inline, "max_inline=%s"}, {Opt_barrier, "barrier"}, {Opt_nobarrier, "nobarrier"}, {Opt_datacow, "datacow"}, {Opt_nodatacow, "nodatacow"}, {Opt_datasum, "datasum"}, {Opt_nodatasum, "nodatasum"}, {Opt_defrag, "autodefrag"}, {Opt_nodefrag, "noautodefrag"}, {Opt_discard, "discard"}, {Opt_discard_mode, "discard=%s"}, {Opt_nodiscard, "nodiscard"}, {Opt_norecovery, "norecovery"}, {Opt_ratio, "metadata_ratio=%u"}, {Opt_rescan_uuid_tree, "rescan_uuid_tree"}, {Opt_skip_balance, "skip_balance"}, {Opt_space_cache, "space_cache"}, {Opt_no_space_cache, "nospace_cache"}, {Opt_space_cache_version, "space_cache=%s"}, {Opt_ssd, "ssd"}, {Opt_nossd, "nossd"}, {Opt_ssd_spread, "ssd_spread"}, {Opt_nossd_spread, "nossd_spread"}, {Opt_subvol, "subvol=%s"}, {Opt_subvol_empty, "subvol="}, {Opt_subvolid, "subvolid=%s"}, {Opt_thread_pool, "thread_pool=%u"}, {Opt_treelog, "treelog"}, {Opt_notreelog, "notreelog"}, {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, /* Rescue options */ {Opt_rescue, "rescue=%s"}, /* Deprecated, with alias rescue=nologreplay */ {Opt_nologreplay, "nologreplay"}, /* Deprecated, with alias rescue=usebackuproot */ {Opt_usebackuproot, "usebackuproot"}, /* Deprecated options */ {Opt_recovery, "recovery"}, /* Debugging options */ {Opt_enospc_debug, "enospc_debug"}, {Opt_noenospc_debug, "noenospc_debug"}, #ifdef CONFIG_BTRFS_DEBUG {Opt_fragment_data, "fragment=data"}, {Opt_fragment_metadata, "fragment=metadata"}, {Opt_fragment_all, "fragment=all"}, #endif #ifdef CONFIG_BTRFS_FS_REF_VERIFY {Opt_ref_verify, "ref_verify"}, #endif {Opt_err, NULL}, }; static const match_table_t rescue_tokens = { {Opt_usebackuproot, "usebackuproot"}, {Opt_nologreplay, "nologreplay"}, {Opt_ignorebadroots, "ignorebadroots"}, {Opt_ignorebadroots, "ibadroots"}, {Opt_ignoredatacsums, "ignoredatacsums"}, {Opt_ignoredatacsums, "idatacsums"}, {Opt_rescue_all, "all"}, {Opt_err, NULL}, }; static bool check_ro_option(struct btrfs_fs_info *fs_info, unsigned long opt, const char *opt_name) { if (fs_info->mount_opt & opt) { btrfs_err(fs_info, "%s must be used with ro mount option", opt_name); return true; } return false; } static int parse_rescue_options(struct btrfs_fs_info *info, const char *options) { char *opts; char *orig; char *p; substring_t args[MAX_OPT_ARGS]; int ret = 0; opts = kstrdup(options, GFP_KERNEL); if (!opts) return -ENOMEM; orig = opts; while ((p = strsep(&opts, ":")) != NULL) { int token; if (!*p) continue; token = match_token(p, rescue_tokens, args); switch (token){ case Opt_usebackuproot: btrfs_info(info, "trying to use backup root at mount time"); btrfs_set_opt(info->mount_opt, USEBACKUPROOT); break; case Opt_nologreplay: btrfs_set_and_info(info, NOLOGREPLAY, "disabling log replay at mount time"); break; case Opt_ignorebadroots: btrfs_set_and_info(info, IGNOREBADROOTS, "ignoring bad roots"); break; case Opt_ignoredatacsums: btrfs_set_and_info(info, IGNOREDATACSUMS, "ignoring data csums"); break; case Opt_rescue_all: btrfs_info(info, "enabling all of the rescue options"); btrfs_set_and_info(info, IGNOREDATACSUMS, "ignoring data csums"); btrfs_set_and_info(info, IGNOREBADROOTS, "ignoring bad roots"); btrfs_set_and_info(info, NOLOGREPLAY, "disabling log replay at mount time"); break; case Opt_err: btrfs_info(info, "unrecognized rescue option '%s'", p); ret = -EINVAL; goto out; default: break; } } out: kfree(orig); return ret; } /* * Regular mount options parser. Everything that is needed only when * reading in a new superblock is parsed here. * XXX JDM: This needs to be cleaned up for remount. */ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, unsigned long new_flags) { substring_t args[MAX_OPT_ARGS]; char *p, *num; int intarg; int ret = 0; char *compress_type; bool compress_force = false; enum btrfs_compression_type saved_compress_type; int saved_compress_level; bool saved_compress_force; int no_compress = 0; const bool remounting = test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state); if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE); else if (btrfs_free_space_cache_v1_active(info)) { if (btrfs_is_zoned(info)) { btrfs_info(info, "zoned: clearing existing space cache"); btrfs_set_super_cache_generation(info->super_copy, 0); } else { btrfs_set_opt(info->mount_opt, SPACE_CACHE); } } /* * Even the options are empty, we still need to do extra check * against new flags */ if (!options) goto check; while ((p = strsep(&options, ",")) != NULL) { int token; if (!*p) continue; token = match_token(p, tokens, args); switch (token) { case Opt_degraded: btrfs_info(info, "allowing degraded mounts"); btrfs_set_opt(info->mount_opt, DEGRADED); break; case Opt_subvol: case Opt_subvol_empty: case Opt_subvolid: case Opt_device: /* * These are parsed by btrfs_parse_subvol_options or * btrfs_parse_device_options and can be ignored here. */ break; case Opt_nodatasum: btrfs_set_and_info(info, NODATASUM, "setting nodatasum"); break; case Opt_datasum: if (btrfs_test_opt(info, NODATASUM)) { if (btrfs_test_opt(info, NODATACOW)) btrfs_info(info, "setting datasum, datacow enabled"); else btrfs_info(info, "setting datasum"); } btrfs_clear_opt(info->mount_opt, NODATACOW); btrfs_clear_opt(info->mount_opt, NODATASUM); break; case Opt_nodatacow: if (!btrfs_test_opt(info, NODATACOW)) { if (!btrfs_test_opt(info, COMPRESS) || !btrfs_test_opt(info, FORCE_COMPRESS)) { btrfs_info(info, "setting nodatacow, compression disabled"); } else { btrfs_info(info, "setting nodatacow"); } } btrfs_clear_opt(info->mount_opt, COMPRESS); btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); btrfs_set_opt(info->mount_opt, NODATACOW); btrfs_set_opt(info->mount_opt, NODATASUM); break; case Opt_datacow: btrfs_clear_and_info(info, NODATACOW, "setting datacow"); break; case Opt_compress_force: case Opt_compress_force_type: compress_force = true; fallthrough; case Opt_compress: case Opt_compress_type: saved_compress_type = btrfs_test_opt(info, COMPRESS) ? info->compress_type : BTRFS_COMPRESS_NONE; saved_compress_force = btrfs_test_opt(info, FORCE_COMPRESS); saved_compress_level = info->compress_level; if (token == Opt_compress || token == Opt_compress_force || strncmp(args[0].from, "zlib", 4) == 0) { compress_type = "zlib"; info->compress_type = BTRFS_COMPRESS_ZLIB; info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL; /* * args[0] contains uninitialized data since * for these tokens we don't expect any * parameter. */ if (token != Opt_compress && token != Opt_compress_force) info->compress_level = btrfs_compress_str2level( BTRFS_COMPRESS_ZLIB, args[0].from + 4); btrfs_set_opt(info->mount_opt, COMPRESS); btrfs_clear_opt(info->mount_opt, NODATACOW); btrfs_clear_opt(info->mount_opt, NODATASUM); no_compress = 0; } else if (strncmp(args[0].from, "lzo", 3) == 0) { compress_type = "lzo"; info->compress_type = BTRFS_COMPRESS_LZO; info->compress_level = 0; btrfs_set_opt(info->mount_opt, COMPRESS); btrfs_clear_opt(info->mount_opt, NODATACOW); btrfs_clear_opt(info->mount_opt, NODATASUM); btrfs_set_fs_incompat(info, COMPRESS_LZO); no_compress = 0; } else if (strncmp(args[0].from, "zstd", 4) == 0) { compress_type = "zstd"; info->compress_type = BTRFS_COMPRESS_ZSTD; info->compress_level = btrfs_compress_str2level( BTRFS_COMPRESS_ZSTD, args[0].from + 4); btrfs_set_opt(info->mount_opt, COMPRESS); btrfs_clear_opt(info->mount_opt, NODATACOW); btrfs_clear_opt(info->mount_opt, NODATASUM); btrfs_set_fs_incompat(info, COMPRESS_ZSTD); no_compress = 0; } else if (strncmp(args[0].from, "no", 2) == 0) { compress_type = "no"; info->compress_level = 0; info->compress_type = 0; btrfs_clear_opt(info->mount_opt, COMPRESS); btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); compress_force = false; no_compress++; } else { btrfs_err(info, "unrecognized compression value %s", args[0].from); ret = -EINVAL; goto out; } if (compress_force) { btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); } else { /* * If we remount from compress-force=xxx to * compress=xxx, we need clear FORCE_COMPRESS * flag, otherwise, there is no way for users * to disable forcible compression separately. */ btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); } if (no_compress == 1) { btrfs_info(info, "use no compression"); } else if ((info->compress_type != saved_compress_type) || (compress_force != saved_compress_force) || (info->compress_level != saved_compress_level)) { btrfs_info(info, "%s %s compression, level %d", (compress_force) ? "force" : "use", compress_type, info->compress_level); } compress_force = false; break; case Opt_ssd: btrfs_set_and_info(info, SSD, "enabling ssd optimizations"); btrfs_clear_opt(info->mount_opt, NOSSD); break; case Opt_ssd_spread: btrfs_set_and_info(info, SSD, "enabling ssd optimizations"); btrfs_set_and_info(info, SSD_SPREAD, "using spread ssd allocation scheme"); btrfs_clear_opt(info->mount_opt, NOSSD); break; case Opt_nossd: btrfs_set_opt(info->mount_opt, NOSSD); btrfs_clear_and_info(info, SSD, "not using ssd optimizations"); fallthrough; case Opt_nossd_spread: btrfs_clear_and_info(info, SSD_SPREAD, "not using spread ssd allocation scheme"); break; case Opt_barrier: btrfs_clear_and_info(info, NOBARRIER, "turning on barriers"); break; case Opt_nobarrier: btrfs_set_and_info(info, NOBARRIER, "turning off barriers"); break; case Opt_thread_pool: ret = match_int(&args[0], &intarg); if (ret) { btrfs_err(info, "unrecognized thread_pool value %s", args[0].from); goto out; } else if (intarg == 0) { btrfs_err(info, "invalid value 0 for thread_pool"); ret = -EINVAL; goto out; } info->thread_pool_size = intarg; break; case Opt_max_inline: num = match_strdup(&args[0]); if (num) { info->max_inline = memparse(num, NULL); kfree(num); if (info->max_inline) { info->max_inline = min_t(u64, info->max_inline, info->sectorsize); } btrfs_info(info, "max_inline at %llu", info->max_inline); } else { ret = -ENOMEM; goto out; } break; case Opt_acl: #ifdef CONFIG_BTRFS_FS_POSIX_ACL info->sb->s_flags |= SB_POSIXACL; break; #else btrfs_err(info, "support for ACL not compiled in!"); ret = -EINVAL; goto out; #endif case Opt_noacl: info->sb->s_flags &= ~SB_POSIXACL; break; case Opt_notreelog: btrfs_set_and_info(info, NOTREELOG, "disabling tree log"); break; case Opt_treelog: btrfs_clear_and_info(info, NOTREELOG, "enabling tree log"); break; case Opt_norecovery: case Opt_nologreplay: btrfs_warn(info, "'nologreplay' is deprecated, use 'rescue=nologreplay' instead"); btrfs_set_and_info(info, NOLOGREPLAY, "disabling log replay at mount time"); break; case Opt_flushoncommit: btrfs_set_and_info(info, FLUSHONCOMMIT, "turning on flush-on-commit"); break; case Opt_noflushoncommit: btrfs_clear_and_info(info, FLUSHONCOMMIT, "turning off flush-on-commit"); break; case Opt_ratio: ret = match_int(&args[0], &intarg); if (ret) { btrfs_err(info, "unrecognized metadata_ratio value %s", args[0].from); goto out; } info->metadata_ratio = intarg; btrfs_info(info, "metadata ratio %u", info->metadata_ratio); break; case Opt_discard: case Opt_discard_mode: if (token == Opt_discard || strcmp(args[0].from, "sync") == 0) { btrfs_clear_opt(info->mount_opt, DISCARD_ASYNC); btrfs_set_and_info(info, DISCARD_SYNC, "turning on sync discard"); } else if (strcmp(args[0].from, "async") == 0) { btrfs_clear_opt(info->mount_opt, DISCARD_SYNC); btrfs_set_and_info(info, DISCARD_ASYNC, "turning on async discard"); } else { btrfs_err(info, "unrecognized discard mode value %s", args[0].from); ret = -EINVAL; goto out; } btrfs_clear_opt(info->mount_opt, NODISCARD); break; case Opt_nodiscard: btrfs_clear_and_info(info, DISCARD_SYNC, "turning off discard"); btrfs_clear_and_info(info, DISCARD_ASYNC, "turning off async discard"); btrfs_set_opt(info->mount_opt, NODISCARD); break; case Opt_space_cache: case Opt_space_cache_version: /* * We already set FREE_SPACE_TREE above because we have * compat_ro(FREE_SPACE_TREE) set, and we aren't going * to allow v1 to be set for extent tree v2, simply * ignore this setting if we're extent tree v2. */ if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) break; if (token == Opt_space_cache || strcmp(args[0].from, "v1") == 0) { btrfs_clear_opt(info->mount_opt, FREE_SPACE_TREE); btrfs_set_and_info(info, SPACE_CACHE, "enabling disk space caching"); } else if (strcmp(args[0].from, "v2") == 0) { btrfs_clear_opt(info->mount_opt, SPACE_CACHE); btrfs_set_and_info(info, FREE_SPACE_TREE, "enabling free space tree"); } else { btrfs_err(info, "unrecognized space_cache value %s", args[0].from); ret = -EINVAL; goto out; } break; case Opt_rescan_uuid_tree: btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE); break; case Opt_no_space_cache: /* * We cannot operate without the free space tree with * extent tree v2, ignore this option. */ if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) break; if (btrfs_test_opt(info, SPACE_CACHE)) { btrfs_clear_and_info(info, SPACE_CACHE, "disabling disk space caching"); } if (btrfs_test_opt(info, FREE_SPACE_TREE)) { btrfs_clear_and_info(info, FREE_SPACE_TREE, "disabling free space tree"); } break; case Opt_inode_cache: case Opt_noinode_cache: btrfs_warn(info, "the 'inode_cache' option is deprecated and has no effect since 5.11"); break; case Opt_clear_cache: /* * We cannot clear the free space tree with extent tree * v2, ignore this option. */ if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) break; btrfs_set_and_info(info, CLEAR_CACHE, "force clearing of disk cache"); break; case Opt_user_subvol_rm_allowed: btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); break; case Opt_enospc_debug: btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); break; case Opt_noenospc_debug: btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG); break; case Opt_defrag: btrfs_set_and_info(info, AUTO_DEFRAG, "enabling auto defrag"); break; case Opt_nodefrag: btrfs_clear_and_info(info, AUTO_DEFRAG, "disabling auto defrag"); break; case Opt_recovery: case Opt_usebackuproot: btrfs_warn(info, "'%s' is deprecated, use 'rescue=usebackuproot' instead", token == Opt_recovery ? "recovery" : "usebackuproot"); btrfs_info(info, "trying to use backup root at mount time"); btrfs_set_opt(info->mount_opt, USEBACKUPROOT); break; case Opt_skip_balance: btrfs_set_opt(info->mount_opt, SKIP_BALANCE); break; case Opt_fatal_errors: if (strcmp(args[0].from, "panic") == 0) { btrfs_set_opt(info->mount_opt, PANIC_ON_FATAL_ERROR); } else if (strcmp(args[0].from, "bug") == 0) { btrfs_clear_opt(info->mount_opt, PANIC_ON_FATAL_ERROR); } else { btrfs_err(info, "unrecognized fatal_errors value %s", args[0].from); ret = -EINVAL; goto out; } break; case Opt_commit_interval: intarg = 0; ret = match_int(&args[0], &intarg); if (ret) { btrfs_err(info, "unrecognized commit_interval value %s", args[0].from); ret = -EINVAL; goto out; } if (intarg == 0) { btrfs_info(info, "using default commit interval %us", BTRFS_DEFAULT_COMMIT_INTERVAL); intarg = BTRFS_DEFAULT_COMMIT_INTERVAL; } else if (intarg > 300) { btrfs_warn(info, "excessive commit interval %d", intarg); } info->commit_interval = intarg; break; case Opt_rescue: ret = parse_rescue_options(info, args[0].from); if (ret < 0) { btrfs_err(info, "unrecognized rescue value %s", args[0].from); goto out; } break; #ifdef CONFIG_BTRFS_DEBUG case Opt_fragment_all: btrfs_info(info, "fragmenting all space"); btrfs_set_opt(info->mount_opt, FRAGMENT_DATA); btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA); break; case Opt_fragment_metadata: btrfs_info(info, "fragmenting metadata"); btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA); break; case Opt_fragment_data: btrfs_info(info, "fragmenting data"); btrfs_set_opt(info->mount_opt, FRAGMENT_DATA); break; #endif #ifdef CONFIG_BTRFS_FS_REF_VERIFY case Opt_ref_verify: btrfs_info(info, "doing ref verification"); btrfs_set_opt(info->mount_opt, REF_VERIFY); break; #endif case Opt_err: btrfs_err(info, "unrecognized mount option '%s'", p); ret = -EINVAL; goto out; default: break; } } check: /* We're read-only, don't have to check. */ if (new_flags & SB_RDONLY) goto out; if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") || check_ro_option(info, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots") || check_ro_option(info, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums")) ret = -EINVAL; out: if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) && !btrfs_test_opt(info, FREE_SPACE_TREE) && !btrfs_test_opt(info, CLEAR_CACHE)) { btrfs_err(info, "cannot disable free space tree"); ret = -EINVAL; } if (btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE) && !btrfs_test_opt(info, FREE_SPACE_TREE)) { btrfs_err(info, "cannot disable free space tree with block-group-tree feature"); ret = -EINVAL; } if (!ret) ret = btrfs_check_mountopts_zoned(info); if (!ret && !remounting) { if (btrfs_test_opt(info, SPACE_CACHE)) btrfs_info(info, "disk space caching is enabled"); if (btrfs_test_opt(info, FREE_SPACE_TREE)) btrfs_info(info, "using free space tree"); } return ret; } /* * Parse mount options that are required early in the mount process. * * All other options will be parsed on much later in the mount process and * only when we need to allocate a new super block. */ static int btrfs_parse_device_options(const char *options, blk_mode_t flags) { substring_t args[MAX_OPT_ARGS]; char *device_name, *opts, *orig, *p; struct btrfs_device *device = NULL; int error = 0; lockdep_assert_held(&uuid_mutex); if (!options) return 0; /* * strsep changes the string, duplicate it because btrfs_parse_options * gets called later */ opts = kstrdup(options, GFP_KERNEL); if (!opts) return -ENOMEM; orig = opts; while ((p = strsep(&opts, ",")) != NULL) { int token; if (!*p) continue; token = match_token(p, tokens, args); if (token == Opt_device) { device_name = match_strdup(&args[0]); if (!device_name) { error = -ENOMEM; goto out; } device = btrfs_scan_one_device(device_name, flags, false); kfree(device_name); if (IS_ERR(device)) { error = PTR_ERR(device); goto out; } } } out: kfree(orig); return error; } /* * Parse mount options that are related to subvolume id * * The value is later passed to mount_subvol() */ static int btrfs_parse_subvol_options(const char *options, char **subvol_name, u64 *subvol_objectid) { substring_t args[MAX_OPT_ARGS]; char *opts, *orig, *p; int error = 0; u64 subvolid; if (!options) return 0; /* * strsep changes the string, duplicate it because * btrfs_parse_device_options gets called later */ opts = kstrdup(options, GFP_KERNEL); if (!opts) return -ENOMEM; orig = opts; while ((p = strsep(&opts, ",")) != NULL) { int token; if (!*p) continue; token = match_token(p, tokens, args); switch (token) { case Opt_subvol: kfree(*subvol_name); *subvol_name = match_strdup(&args[0]); if (!*subvol_name) { error = -ENOMEM; goto out; } break; case Opt_subvolid: error = match_u64(&args[0], &subvolid); if (error) goto out; /* we want the original fs_tree */ if (subvolid == 0) subvolid = BTRFS_FS_TREE_OBJECTID; *subvol_objectid = subvolid; break; default: break; } } out: kfree(orig); return error; } char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, u64 subvol_objectid) { struct btrfs_root *root = fs_info->tree_root; struct btrfs_root *fs_root = NULL; struct btrfs_root_ref *root_ref; struct btrfs_inode_ref *inode_ref; struct btrfs_key key; struct btrfs_path *path = NULL; char *name = NULL, *ptr; u64 dirid; int len; int ret; path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto err; } name = kmalloc(PATH_MAX, GFP_KERNEL); if (!name) { ret = -ENOMEM; goto err; } ptr = name + PATH_MAX - 1; ptr[0] = '\0'; /* * Walk up the subvolume trees in the tree of tree roots by root * backrefs until we hit the top-level subvolume. */ while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) { key.objectid = subvol_objectid; key.type = BTRFS_ROOT_BACKREF_KEY; key.offset = (u64)-1; ret = btrfs_search_backwards(root, &key, path); if (ret < 0) { goto err; } else if (ret > 0) { ret = -ENOENT; goto err; } subvol_objectid = key.offset; root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_root_ref); len = btrfs_root_ref_name_len(path->nodes[0], root_ref); ptr -= len + 1; if (ptr < name) { ret = -ENAMETOOLONG; goto err; } read_extent_buffer(path->nodes[0], ptr + 1, (unsigned long)(root_ref + 1), len); ptr[0] = '/'; dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref); btrfs_release_path(path); fs_root = btrfs_get_fs_root(fs_info, subvol_objectid, true); if (IS_ERR(fs_root)) { ret = PTR_ERR(fs_root); fs_root = NULL; goto err; } /* * Walk up the filesystem tree by inode refs until we hit the * root directory. */ while (dirid != BTRFS_FIRST_FREE_OBJECTID) { key.objectid = dirid; key.type = BTRFS_INODE_REF_KEY; key.offset = (u64)-1; ret = btrfs_search_backwards(fs_root, &key, path); if (ret < 0) { goto err; } else if (ret > 0) { ret = -ENOENT; goto err; } dirid = key.offset; inode_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_ref); len = btrfs_inode_ref_name_len(path->nodes[0], inode_ref); ptr -= len + 1; if (ptr < name) { ret = -ENAMETOOLONG; goto err; } read_extent_buffer(path->nodes[0], ptr + 1, (unsigned long)(inode_ref + 1), len); ptr[0] = '/'; btrfs_release_path(path); } btrfs_put_root(fs_root); fs_root = NULL; } btrfs_free_path(path); if (ptr == name + PATH_MAX - 1) { name[0] = '/'; name[1] = '\0'; } else { memmove(name, ptr, name + PATH_MAX - ptr); } return name; err: btrfs_put_root(fs_root); btrfs_free_path(path); kfree(name); return ERR_PTR(ret); } static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid) { struct btrfs_root *root = fs_info->tree_root; struct btrfs_dir_item *di; struct btrfs_path *path; struct btrfs_key location; struct fscrypt_str name = FSTR_INIT("default", 7); u64 dir_id; path = btrfs_alloc_path(); if (!path) return -ENOMEM; /* * Find the "default" dir item which points to the root item that we * will mount by default if we haven't been given a specific subvolume * to mount. */ dir_id = btrfs_super_root_dir(fs_info->super_copy); di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0); if (IS_ERR(di)) { btrfs_free_path(path); return PTR_ERR(di); } if (!di) { /* * Ok the default dir item isn't there. This is weird since * it's always been there, but don't freak out, just try and * mount the top-level subvolume. */ btrfs_free_path(path); *objectid = BTRFS_FS_TREE_OBJECTID; return 0; } btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); btrfs_free_path(path); *objectid = location.objectid; return 0; } static int btrfs_fill_super(struct super_block *sb, struct btrfs_fs_devices *fs_devices, void *data) { struct inode *inode; struct btrfs_fs_info *fs_info = btrfs_sb(sb); int err; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = BTRFS_SUPER_MAGIC; sb->s_op = &btrfs_super_ops; sb->s_d_op = &btrfs_dentry_operations; sb->s_export_op = &btrfs_export_ops; #ifdef CONFIG_FS_VERITY sb->s_vop = &btrfs_verityops; #endif sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; #ifdef CONFIG_BTRFS_FS_POSIX_ACL sb->s_flags |= SB_POSIXACL; #endif sb->s_flags |= SB_I_VERSION; sb->s_iflags |= SB_I_CGROUPWB; err = super_setup_bdi(sb); if (err) { btrfs_err(fs_info, "super_setup_bdi failed"); return err; } err = open_ctree(sb, fs_devices, (char *)data); if (err) { btrfs_err(fs_info, "open_ctree failed"); return err; } inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root); if (IS_ERR(inode)) { err = PTR_ERR(inode); btrfs_handle_fs_error(fs_info, err, NULL); goto fail_close; } sb->s_root = d_make_root(inode); if (!sb->s_root) { err = -ENOMEM; goto fail_close; } sb->s_flags |= SB_ACTIVE; return 0; fail_close: close_ctree(fs_info); return err; } int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *root = fs_info->tree_root; trace_btrfs_sync_fs(fs_info, wait); if (!wait) { filemap_flush(fs_info->btree_inode->i_mapping); return 0; } btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); trans = btrfs_attach_transaction_barrier(root); if (IS_ERR(trans)) { /* no transaction, don't bother */ if (PTR_ERR(trans) == -ENOENT) { /* * Exit unless we have some pending changes * that need to go through commit */ if (!test_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags)) return 0; /* * A non-blocking test if the fs is frozen. We must not * start a new transaction here otherwise a deadlock * happens. The pending operations are delayed to the * next commit after thawing. */ if (sb_start_write_trylock(sb)) sb_end_write(sb); else return 0; trans = btrfs_start_transaction(root, 0); } if (IS_ERR(trans)) return PTR_ERR(trans); } return btrfs_commit_transaction(trans); } static void print_rescue_option(struct seq_file *seq, const char *s, bool *printed) { seq_printf(seq, "%s%s", (*printed) ? ":" : ",rescue=", s); *printed = true; } static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) { struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); const char *compress_type; const char *subvol_name; bool printed = false; if (btrfs_test_opt(info, DEGRADED)) seq_puts(seq, ",degraded"); if (btrfs_test_opt(info, NODATASUM)) seq_puts(seq, ",nodatasum"); if (btrfs_test_opt(info, NODATACOW)) seq_puts(seq, ",nodatacow"); if (btrfs_test_opt(info, NOBARRIER)) seq_puts(seq, ",nobarrier"); if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE) seq_printf(seq, ",max_inline=%llu", info->max_inline); if (info->thread_pool_size != min_t(unsigned long, num_online_cpus() + 2, 8)) seq_printf(seq, ",thread_pool=%u", info->thread_pool_size); if (btrfs_test_opt(info, COMPRESS)) { compress_type = btrfs_compress_type2str(info->compress_type); if (btrfs_test_opt(info, FORCE_COMPRESS)) seq_printf(seq, ",compress-force=%s", compress_type); else seq_printf(seq, ",compress=%s", compress_type); if (info->compress_level) seq_printf(seq, ":%d", info->compress_level); } if (btrfs_test_opt(info, NOSSD)) seq_puts(seq, ",nossd"); if (btrfs_test_opt(info, SSD_SPREAD)) seq_puts(seq, ",ssd_spread"); else if (btrfs_test_opt(info, SSD)) seq_puts(seq, ",ssd"); if (btrfs_test_opt(info, NOTREELOG)) seq_puts(seq, ",notreelog"); if (btrfs_test_opt(info, NOLOGREPLAY)) print_rescue_option(seq, "nologreplay", &printed); if (btrfs_test_opt(info, USEBACKUPROOT)) print_rescue_option(seq, "usebackuproot", &printed); if (btrfs_test_opt(info, IGNOREBADROOTS)) print_rescue_option(seq, "ignorebadroots", &printed); if (btrfs_test_opt(info, IGNOREDATACSUMS)) print_rescue_option(seq, "ignoredatacsums", &printed); if (btrfs_test_opt(info, FLUSHONCOMMIT)) seq_puts(seq, ",flushoncommit"); if (btrfs_test_opt(info, DISCARD_SYNC)) seq_puts(seq, ",discard"); if (btrfs_test_opt(info, DISCARD_ASYNC)) seq_puts(seq, ",discard=async"); if (!(info->sb->s_flags & SB_POSIXACL)) seq_puts(seq, ",noacl"); if (btrfs_free_space_cache_v1_active(info)) seq_puts(seq, ",space_cache"); else if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) seq_puts(seq, ",space_cache=v2"); else seq_puts(seq, ",nospace_cache"); if (btrfs_test_opt(info, RESCAN_UUID_TREE)) seq_puts(seq, ",rescan_uuid_tree"); if (btrfs_test_opt(info, CLEAR_CACHE)) seq_puts(seq, ",clear_cache"); if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED)) seq_puts(seq, ",user_subvol_rm_allowed"); if (btrfs_test_opt(info, ENOSPC_DEBUG)) seq_puts(seq, ",enospc_debug"); if (btrfs_test_opt(info, AUTO_DEFRAG)) seq_puts(seq, ",autodefrag"); if (btrfs_test_opt(info, SKIP_BALANCE)) seq_puts(seq, ",skip_balance"); if (info->metadata_ratio) seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio); if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR)) seq_puts(seq, ",fatal_errors=panic"); if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) seq_printf(seq, ",commit=%u", info->commit_interval); #ifdef CONFIG_BTRFS_DEBUG if (btrfs_test_opt(info, FRAGMENT_DATA)) seq_puts(seq, ",fragment=data"); if (btrfs_test_opt(info, FRAGMENT_METADATA)) seq_puts(seq, ",fragment=metadata"); #endif if (btrfs_test_opt(info, REF_VERIFY)) seq_puts(seq, ",ref_verify"); seq_printf(seq, ",subvolid=%llu", BTRFS_I(d_inode(dentry))->root->root_key.objectid); subvol_name = btrfs_get_subvol_name_from_objectid(info, BTRFS_I(d_inode(dentry))->root->root_key.objectid); if (!IS_ERR(subvol_name)) { seq_puts(seq, ",subvol="); seq_escape(seq, subvol_name, " \t\n\\"); kfree(subvol_name); } return 0; } static int btrfs_test_super(struct super_block *s, void *data) { struct btrfs_fs_info *p = data; struct btrfs_fs_info *fs_info = btrfs_sb(s); return fs_info->fs_devices == p->fs_devices; } static int btrfs_set_super(struct super_block *s, void *data) { int err = set_anon_super(s, data); if (!err) s->s_fs_info = data; return err; } /* * subvolumes are identified by ino 256 */ static inline int is_subvolume_inode(struct inode *inode) { if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) return 1; return 0; } static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, struct vfsmount *mnt) { struct dentry *root; int ret; if (!subvol_name) { if (!subvol_objectid) { ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb), &subvol_objectid); if (ret) { root = ERR_PTR(ret); goto out; } } subvol_name = btrfs_get_subvol_name_from_objectid( btrfs_sb(mnt->mnt_sb), subvol_objectid); if (IS_ERR(subvol_name)) { root = ERR_CAST(subvol_name); subvol_name = NULL; goto out; } } root = mount_subtree(mnt, subvol_name); /* mount_subtree() drops our reference on the vfsmount. */ mnt = NULL; if (!IS_ERR(root)) { struct super_block *s = root->d_sb; struct btrfs_fs_info *fs_info = btrfs_sb(s); struct inode *root_inode = d_inode(root); u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid; ret = 0; if (!is_subvolume_inode(root_inode)) { btrfs_err(fs_info, "'%s' is not a valid subvolume", subvol_name); ret = -EINVAL; } if (subvol_objectid && root_objectid != subvol_objectid) { /* * This will also catch a race condition where a * subvolume which was passed by ID is renamed and * another subvolume is renamed over the old location. */ btrfs_err(fs_info, "subvol '%s' does not match subvolid %llu", subvol_name, subvol_objectid); ret = -EINVAL; } if (ret) { dput(root); root = ERR_PTR(ret); deactivate_locked_super(s); } } out: mntput(mnt); kfree(subvol_name); return root; } /* * Find a superblock for the given device / mount point. * * Note: This is based on mount_bdev from fs/super.c with a few additions * for multiple device setup. Make sure to keep it in sync. */ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, int flags, const char *device_name, void *data) { struct block_device *bdev = NULL; struct super_block *s; struct btrfs_device *device = NULL; struct btrfs_fs_devices *fs_devices = NULL; struct btrfs_fs_info *fs_info = NULL; void *new_sec_opts = NULL; blk_mode_t mode = sb_open_mode(flags); int error = 0; if (data) { error = security_sb_eat_lsm_opts(data, &new_sec_opts); if (error) return ERR_PTR(error); } /* * Setup a dummy root and fs_info for test/set super. This is because * we don't actually fill this stuff out until open_ctree, but we need * then open_ctree will properly initialize the file system specific * settings later. btrfs_init_fs_info initializes the static elements * of the fs_info (locks and such) to make cleanup easier if we find a * superblock with our given fs_devices later on at sget() time. */ fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL); if (!fs_info) { error = -ENOMEM; goto error_sec_opts; } btrfs_init_fs_info(fs_info); fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); if (!fs_info->super_copy || !fs_info->super_for_commit) { error = -ENOMEM; goto error_fs_info; } mutex_lock(&uuid_mutex); error = btrfs_parse_device_options(data, mode); if (error) { mutex_unlock(&uuid_mutex); goto error_fs_info; } /* * With 'true' passed to btrfs_scan_one_device() (mount time) we expect * either a valid device or an error. */ device = btrfs_scan_one_device(device_name, mode, true); ASSERT(device != NULL); if (IS_ERR(device)) { mutex_unlock(&uuid_mutex); error = PTR_ERR(device); goto error_fs_info; } fs_devices = device->fs_devices; fs_info->fs_devices = fs_devices; error = btrfs_open_devices(fs_devices, mode, fs_type); mutex_unlock(&uuid_mutex); if (error) goto error_fs_info; if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) { error = -EACCES; goto error_close_devices; } bdev = fs_devices->latest_dev->bdev; s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC, fs_info); if (IS_ERR(s)) { error = PTR_ERR(s); goto error_close_devices; } if (s->s_root) { btrfs_close_devices(fs_devices); btrfs_free_fs_info(fs_info); if ((flags ^ s->s_flags) & SB_RDONLY) error = -EBUSY; } else { snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); shrinker_debugfs_rename(s->s_shrink, "sb-%s:%s", fs_type->name, s->s_id); btrfs_sb(s)->bdev_holder = fs_type; error = btrfs_fill_super(s, fs_devices, data); } if (!error) error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL); security_free_mnt_opts(&new_sec_opts); if (error) { deactivate_locked_super(s); return ERR_PTR(error); } return dget(s->s_root); error_close_devices: btrfs_close_devices(fs_devices); error_fs_info: btrfs_free_fs_info(fs_info); error_sec_opts: security_free_mnt_opts(&new_sec_opts); return ERR_PTR(error); } /* * Mount function which is called by VFS layer. * * In order to allow mounting a subvolume directly, btrfs uses mount_subtree() * which needs vfsmount* of device's root (/). This means device's root has to * be mounted internally in any case. * * Operation flow: * 1. Parse subvol id related options for later use in mount_subvol(). * * 2. Mount device's root (/) by calling vfs_kern_mount(). * * NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the * first place. In order to avoid calling btrfs_mount() again, we use * different file_system_type which is not registered to VFS by * register_filesystem() (btrfs_root_fs_type). As a result, * btrfs_mount_root() is called. The return value will be used by * mount_subtree() in mount_subvol(). * * 3. Call mount_subvol() to get the dentry of subvolume. Since there is * "btrfs subvolume set-default", mount_subvol() is called always. */ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, const char *device_name, void *data) { struct vfsmount *mnt_root; struct dentry *root; char *subvol_name = NULL; u64 subvol_objectid = 0; int error = 0; error = btrfs_parse_subvol_options(data, &subvol_name, &subvol_objectid); if (error) { kfree(subvol_name); return ERR_PTR(error); } /* mount device's root (/) */ mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data); if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) { if (flags & SB_RDONLY) { mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags & ~SB_RDONLY, device_name, data); } else { mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags | SB_RDONLY, device_name, data); if (IS_ERR(mnt_root)) { root = ERR_CAST(mnt_root); kfree(subvol_name); goto out; } down_write(&mnt_root->mnt_sb->s_umount); error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL); up_write(&mnt_root->mnt_sb->s_umount); if (error < 0) { root = ERR_PTR(error); mntput(mnt_root); kfree(subvol_name); goto out; } } } if (IS_ERR(mnt_root)) { root = ERR_CAST(mnt_root); kfree(subvol_name); goto out; } /* mount_subvol() will free subvol_name and mnt_root */ root = mount_subvol(subvol_name, subvol_objectid, mnt_root); out: return root; } static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, u32 new_pool_size, u32 old_pool_size) { if (new_pool_size == old_pool_size) return; fs_info->thread_pool_size = new_pool_size; btrfs_info(fs_info, "resize thread pool %d -> %d", old_pool_size, new_pool_size); btrfs_workqueue_set_max(fs_info->workers, new_pool_size); btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); workqueue_set_max_active(fs_info->endio_workers, new_pool_size); workqueue_set_max_active(fs_info->endio_meta_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size); btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size); } static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info, unsigned long old_opts, int flags) { if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || (flags & SB_RDONLY))) { /* wait for any defraggers to finish */ wait_event(fs_info->transaction_wait, (atomic_read(&fs_info->defrag_running) == 0)); if (flags & SB_RDONLY) sync_filesystem(fs_info->sb); } } static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info, unsigned long old_opts) { const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE); /* * We need to cleanup all defragable inodes if the autodefragment is * close or the filesystem is read only. */ if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || sb_rdonly(fs_info->sb))) { btrfs_cleanup_defrag_inodes(fs_info); } /* If we toggled discard async */ if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) && btrfs_test_opt(fs_info, DISCARD_ASYNC)) btrfs_discard_resume(fs_info); else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) && !btrfs_test_opt(fs_info, DISCARD_ASYNC)) btrfs_discard_cleanup(fs_info); /* If we toggled space cache */ if (cache_opt != btrfs_free_space_cache_v1_active(fs_info)) btrfs_set_free_space_cache_v1_active(fs_info, cache_opt); } static int btrfs_remount(struct super_block *sb, int *flags, char *data) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); unsigned old_flags = sb->s_flags; unsigned long old_opts = fs_info->mount_opt; unsigned long old_compress_type = fs_info->compress_type; u64 old_max_inline = fs_info->max_inline; u32 old_thread_pool_size = fs_info->thread_pool_size; u32 old_metadata_ratio = fs_info->metadata_ratio; int ret; sync_filesystem(sb); set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); if (data) { void *new_sec_opts = NULL; ret = security_sb_eat_lsm_opts(data, &new_sec_opts); if (!ret) ret = security_sb_remount(sb, new_sec_opts); security_free_mnt_opts(&new_sec_opts); if (ret) goto restore; } ret = btrfs_parse_options(fs_info, data, *flags); if (ret) goto restore; ret = btrfs_check_features(fs_info, !(*flags & SB_RDONLY)); if (ret < 0) goto restore; btrfs_remount_begin(fs_info, old_opts, *flags); btrfs_resize_thread_pool(fs_info, fs_info->thread_pool_size, old_thread_pool_size); if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) != (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && (!sb_rdonly(sb) || (*flags & SB_RDONLY))) { btrfs_warn(fs_info, "remount supports changing free space tree only from ro to rw"); /* Make sure free space cache options match the state on disk */ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE); btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE); } if (btrfs_free_space_cache_v1_active(fs_info)) { btrfs_clear_opt(fs_info->mount_opt, FREE_SPACE_TREE); btrfs_set_opt(fs_info->mount_opt, SPACE_CACHE); } } if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) goto out; if (*flags & SB_RDONLY) { /* * this also happens on 'umount -rf' or on shutdown, when * the filesystem is busy. */ cancel_work_sync(&fs_info->async_reclaim_work); cancel_work_sync(&fs_info->async_data_reclaim_work); btrfs_discard_cleanup(fs_info); /* wait for the uuid_scan task to finish */ down(&fs_info->uuid_tree_rescan_sem); /* avoid complains from lockdep et al. */ up(&fs_info->uuid_tree_rescan_sem); btrfs_set_sb_rdonly(sb); /* * Setting SB_RDONLY will put the cleaner thread to * sleep at the next loop if it's already active. * If it's already asleep, we'll leave unused block * groups on disk until we're mounted read-write again * unless we clean them up here. */ btrfs_delete_unused_bgs(fs_info); /* * The cleaner task could be already running before we set the * flag BTRFS_FS_STATE_RO (and SB_RDONLY in the superblock). * We must make sure that after we finish the remount, i.e. after * we call btrfs_commit_super(), the cleaner can no longer start * a transaction - either because it was dropping a dead root, * running delayed iputs or deleting an unused block group (the * cleaner picked a block group from the list of unused block * groups before we were able to in the previous call to * btrfs_delete_unused_bgs()). */ wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING, TASK_UNINTERRUPTIBLE); /* * We've set the superblock to RO mode, so we might have made * the cleaner task sleep without running all pending delayed * iputs. Go through all the delayed iputs here, so that if an * unmount happens without remounting RW we don't end up at * finishing close_ctree() with a non-empty list of delayed * iputs. */ btrfs_run_delayed_iputs(fs_info); btrfs_dev_replace_suspend_for_unmount(fs_info); btrfs_scrub_cancel(fs_info); btrfs_pause_balance(fs_info); /* * Pause the qgroup rescan worker if it is running. We don't want * it to be still running after we are in RO mode, as after that, * by the time we unmount, it might have left a transaction open, * so we would leak the transaction and/or crash. */ btrfs_qgroup_wait_for_completion(fs_info, false); ret = btrfs_commit_super(fs_info); if (ret) goto restore; } else { if (BTRFS_FS_ERROR(fs_info)) { btrfs_err(fs_info, "Remounting read-write after error is not allowed"); ret = -EINVAL; goto restore; } if (fs_info->fs_devices->rw_devices == 0) { ret = -EACCES; goto restore; } if (!btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, "too many missing devices, writable remount is not allowed"); ret = -EACCES; goto restore; } if (btrfs_super_log_root(fs_info->super_copy) != 0) { btrfs_warn(fs_info, "mount required to replay tree-log, cannot remount read-write"); ret = -EINVAL; goto restore; } /* * NOTE: when remounting with a change that does writes, don't * put it anywhere above this point, as we are not sure to be * safe to write until we pass the above checks. */ ret = btrfs_start_pre_rw_mount(fs_info); if (ret) goto restore; btrfs_clear_sb_rdonly(sb); set_bit(BTRFS_FS_OPEN, &fs_info->flags); /* * If we've gone from readonly -> read/write, we need to get * our sync/async discard lists in the right state. */ btrfs_discard_resume(fs_info); } out: /* * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS, * since the absence of the flag means it can be toggled off by remount. */ *flags |= SB_I_VERSION; wake_up_process(fs_info->transaction_kthread); btrfs_remount_cleanup(fs_info, old_opts); btrfs_clear_oneshot_options(fs_info); clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); return 0; restore: /* We've hit an error - don't reset SB_RDONLY */ if (sb_rdonly(sb)) old_flags |= SB_RDONLY; if (!(old_flags & SB_RDONLY)) clear_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state); sb->s_flags = old_flags; fs_info->mount_opt = old_opts; fs_info->compress_type = old_compress_type; fs_info->max_inline = old_max_inline; btrfs_resize_thread_pool(fs_info, old_thread_pool_size, fs_info->thread_pool_size); fs_info->metadata_ratio = old_metadata_ratio; btrfs_remount_cleanup(fs_info, old_opts); clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); return ret; } /* Used to sort the devices by max_avail(descending sort) */ static int btrfs_cmp_device_free_bytes(const void *a, const void *b) { const struct btrfs_device_info *dev_info1 = a; const struct btrfs_device_info *dev_info2 = b; if (dev_info1->max_avail > dev_info2->max_avail) return -1; else if (dev_info1->max_avail < dev_info2->max_avail) return 1; return 0; } /* * sort the devices by max_avail, in which max free extent size of each device * is stored.(Descending Sort) */ static inline void btrfs_descending_sort_devices( struct btrfs_device_info *devices, size_t nr_devices) { sort(devices, nr_devices, sizeof(struct btrfs_device_info), btrfs_cmp_device_free_bytes, NULL); } /* * The helper to calc the free space on the devices that can be used to store * file data. */ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, u64 *free_bytes) { struct btrfs_device_info *devices_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; u64 type; u64 avail_space; u64 min_stripe_size; int num_stripes = 1; int i = 0, nr_devices; const struct btrfs_raid_attr *rattr; /* * We aren't under the device list lock, so this is racy-ish, but good * enough for our purposes. */ nr_devices = fs_info->fs_devices->open_devices; if (!nr_devices) { smp_mb(); nr_devices = fs_info->fs_devices->open_devices; ASSERT(nr_devices); if (!nr_devices) { *free_bytes = 0; return 0; } } devices_info = kmalloc_array(nr_devices, sizeof(*devices_info), GFP_KERNEL); if (!devices_info) return -ENOMEM; /* calc min stripe number for data space allocation */ type = btrfs_data_alloc_profile(fs_info); rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)]; if (type & BTRFS_BLOCK_GROUP_RAID0) num_stripes = nr_devices; else if (type & BTRFS_BLOCK_GROUP_RAID1_MASK) num_stripes = rattr->ncopies; else if (type & BTRFS_BLOCK_GROUP_RAID10) num_stripes = 4; /* Adjust for more than 1 stripe per device */ min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN; rcu_read_lock(); list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) || !device->bdev || test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) continue; if (i >= nr_devices) break; avail_space = device->total_bytes - device->bytes_used; /* align with stripe_len */ avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN); /* * Ensure we have at least min_stripe_size on top of the * reserved space on the device. */ if (avail_space <= BTRFS_DEVICE_RANGE_RESERVED + min_stripe_size) continue; avail_space -= BTRFS_DEVICE_RANGE_RESERVED; devices_info[i].dev = device; devices_info[i].max_avail = avail_space; i++; } rcu_read_unlock(); nr_devices = i; btrfs_descending_sort_devices(devices_info, nr_devices); i = nr_devices - 1; avail_space = 0; while (nr_devices >= rattr->devs_min) { num_stripes = min(num_stripes, nr_devices); if (devices_info[i].max_avail >= min_stripe_size) { int j; u64 alloc_size; avail_space += devices_info[i].max_avail * num_stripes; alloc_size = devices_info[i].max_avail; for (j = i + 1 - num_stripes; j <= i; j++) devices_info[j].max_avail -= alloc_size; } i--; nr_devices--; } kfree(devices_info); *free_bytes = avail_space; return 0; } /* * Calculate numbers for 'df', pessimistic in case of mixed raid profiles. * * If there's a redundant raid level at DATA block groups, use the respective * multiplier to scale the sizes. * * Unused device space usage is based on simulating the chunk allocator * algorithm that respects the device sizes and order of allocations. This is * a close approximation of the actual use but there are other factors that may * change the result (like a new metadata chunk). * * If metadata is exhausted, f_bavail will be 0. */ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb); struct btrfs_super_block *disk_super = fs_info->super_copy; struct btrfs_space_info *found; u64 total_used = 0; u64 total_free_data = 0; u64 total_free_meta = 0; u32 bits = fs_info->sectorsize_bits; __be32 *fsid = (__be32 *)fs_info->fs_devices->fsid; unsigned factor = 1; struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; int ret; u64 thresh = 0; int mixed = 0; list_for_each_entry(found, &fs_info->space_info, list) { if (found->flags & BTRFS_BLOCK_GROUP_DATA) { int i; total_free_data += found->disk_total - found->disk_used; total_free_data -= btrfs_account_ro_block_groups_free_space(found); for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { if (!list_empty(&found->block_groups[i])) factor = btrfs_bg_type_to_factor( btrfs_raid_array[i].bg_flag); } } /* * Metadata in mixed block group profiles are accounted in data */ if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) { if (found->flags & BTRFS_BLOCK_GROUP_DATA) mixed = 1; else total_free_meta += found->disk_total - found->disk_used; } total_used += found->disk_used; } buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor); buf->f_blocks >>= bits; buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits); /* Account global block reserve as used, it's in logical size already */ spin_lock(&block_rsv->lock); /* Mixed block groups accounting is not byte-accurate, avoid overflow */ if (buf->f_bfree >= block_rsv->size >> bits) buf->f_bfree -= block_rsv->size >> bits; else buf->f_bfree = 0; spin_unlock(&block_rsv->lock); buf->f_bavail = div_u64(total_free_data, factor); ret = btrfs_calc_avail_data_space(fs_info, &total_free_data); if (ret) return ret; buf->f_bavail += div_u64(total_free_data, factor); buf->f_bavail = buf->f_bavail >> bits; /* * We calculate the remaining metadata space minus global reserve. If * this is (supposedly) smaller than zero, there's no space. But this * does not hold in practice, the exhausted state happens where's still * some positive delta. So we apply some guesswork and compare the * delta to a 4M threshold. (Practically observed delta was ~2M.) * * We probably cannot calculate the exact threshold value because this * depends on the internal reservations requested by various * operations, so some operations that consume a few metadata will * succeed even if the Avail is zero. But this is better than the other * way around. */ thresh = SZ_4M; /* * We only want to claim there's no available space if we can no longer * allocate chunks for our metadata profile and our global reserve will * not fit in the free metadata space. If we aren't ->full then we * still can allocate chunks and thus are fine using the currently * calculated f_bavail. */ if (!mixed && block_rsv->space_info->full && (total_free_meta < thresh || total_free_meta - thresh < block_rsv->size)) buf->f_bavail = 0; buf->f_type = BTRFS_SUPER_MAGIC; buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_namelen = BTRFS_NAME_LEN; /* We treat it as constant endianness (it doesn't matter _which_) because we want the fsid to come out the same whether mounted on a big-endian or little-endian host */ buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]); buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); /* Mask in the root object ID too, to disambiguate subvols */ buf->f_fsid.val[0] ^= BTRFS_I(d_inode(dentry))->root->root_key.objectid >> 32; buf->f_fsid.val[1] ^= BTRFS_I(d_inode(dentry))->root->root_key.objectid; return 0; } static void btrfs_kill_super(struct super_block *sb) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); kill_anon_super(sb); btrfs_free_fs_info(fs_info); } static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", .mount = btrfs_mount, .kill_sb = btrfs_kill_super, .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, }; static struct file_system_type btrfs_root_fs_type = { .owner = THIS_MODULE, .name = "btrfs", .mount = btrfs_mount_root, .kill_sb = btrfs_kill_super, .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("btrfs"); static int btrfs_control_open(struct inode *inode, struct file *file) { /* * The control file's private_data is used to hold the * transaction when it is started and is used to keep * track of whether a transaction is already in progress. */ file->private_data = NULL; return 0; } /* * Used by /dev/btrfs-control for devices ioctls. */ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct btrfs_ioctl_vol_args *vol; struct btrfs_device *device = NULL; dev_t devt = 0; int ret = -ENOTTY; if (!capable(CAP_SYS_ADMIN)) return -EPERM; vol = memdup_user((void __user *)arg, sizeof(*vol)); if (IS_ERR(vol)) return PTR_ERR(vol); vol->name[BTRFS_PATH_NAME_MAX] = '\0'; switch (cmd) { case BTRFS_IOC_SCAN_DEV: mutex_lock(&uuid_mutex); /* * Scanning outside of mount can return NULL which would turn * into 0 error code. */ device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ, false); ret = PTR_ERR_OR_ZERO(device); mutex_unlock(&uuid_mutex); break; case BTRFS_IOC_FORGET_DEV: if (vol->name[0] != 0) { ret = lookup_bdev(vol->name, &devt); if (ret) break; } ret = btrfs_forget_devices(devt); break; case BTRFS_IOC_DEVICES_READY: mutex_lock(&uuid_mutex); /* * Scanning outside of mount can return NULL which would turn * into 0 error code. */ device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ, false); if (IS_ERR_OR_NULL(device)) { mutex_unlock(&uuid_mutex); ret = PTR_ERR(device); break; } ret = !(device->fs_devices->num_devices == device->fs_devices->total_devices); mutex_unlock(&uuid_mutex); break; case BTRFS_IOC_GET_SUPPORTED_FEATURES: ret = btrfs_ioctl_get_supported_features((void __user*)arg); break; } kfree(vol); return ret; } static int btrfs_freeze(struct super_block *sb) { struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *root = fs_info->tree_root; set_bit(BTRFS_FS_FROZEN, &fs_info->flags); /* * We don't need a barrier here, we'll wait for any transaction that * could be in progress on other threads (and do delayed iputs that * we want to avoid on a frozen filesystem), or do the commit * ourselves. */ trans = btrfs_attach_transaction_barrier(root); if (IS_ERR(trans)) { /* no transaction, don't bother */ if (PTR_ERR(trans) == -ENOENT) return 0; return PTR_ERR(trans); } return btrfs_commit_transaction(trans); } static int check_dev_super(struct btrfs_device *dev) { struct btrfs_fs_info *fs_info = dev->fs_info; struct btrfs_super_block *sb; u64 last_trans; u16 csum_type; int ret = 0; /* This should be called with fs still frozen. */ ASSERT(test_bit(BTRFS_FS_FROZEN, &fs_info->flags)); /* Missing dev, no need to check. */ if (!dev->bdev) return 0; /* Only need to check the primary super block. */ sb = btrfs_read_dev_one_super(dev->bdev, 0, true); if (IS_ERR(sb)) return PTR_ERR(sb); /* Verify the checksum. */ csum_type = btrfs_super_csum_type(sb); if (csum_type != btrfs_super_csum_type(fs_info->super_copy)) { btrfs_err(fs_info, "csum type changed, has %u expect %u", csum_type, btrfs_super_csum_type(fs_info->super_copy)); ret = -EUCLEAN; goto out; } if (btrfs_check_super_csum(fs_info, sb)) { btrfs_err(fs_info, "csum for on-disk super block no longer matches"); ret = -EUCLEAN; goto out; } /* Btrfs_validate_super() includes fsid check against super->fsid. */ ret = btrfs_validate_super(fs_info, sb, 0); if (ret < 0) goto out; last_trans = btrfs_get_last_trans_committed(fs_info); if (btrfs_super_generation(sb) != last_trans) { btrfs_err(fs_info, "transid mismatch, has %llu expect %llu", btrfs_super_generation(sb), last_trans); ret = -EUCLEAN; goto out; } out: btrfs_release_disk_super(sb); return ret; } static int btrfs_unfreeze(struct super_block *sb) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_device *device; int ret = 0; /* * Make sure the fs is not changed by accident (like hibernation then * modified by other OS). * If we found anything wrong, we mark the fs error immediately. * * And since the fs is frozen, no one can modify the fs yet, thus * we don't need to hold device_list_mutex. */ list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { ret = check_dev_super(device); if (ret < 0) { btrfs_handle_fs_error(fs_info, ret, "super block on devid %llu got modified unexpectedly", device->devid); break; } } clear_bit(BTRFS_FS_FROZEN, &fs_info->flags); /* * We still return 0, to allow VFS layer to unfreeze the fs even the * above checks failed. Since the fs is either fine or read-only, we're * safe to continue, without causing further damage. */ return 0; } static int btrfs_show_devname(struct seq_file *m, struct dentry *root) { struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); /* * There should be always a valid pointer in latest_dev, it may be stale * for a short moment in case it's being deleted but still valid until * the end of RCU grace period. */ rcu_read_lock(); seq_escape(m, btrfs_dev_name(fs_info->fs_devices->latest_dev), " \t\n\\"); rcu_read_unlock(); return 0; } static const struct super_operations btrfs_super_ops = { .drop_inode = btrfs_drop_inode, .evict_inode = btrfs_evict_inode, .put_super = btrfs_put_super, .sync_fs = btrfs_sync_fs, .show_options = btrfs_show_options, .show_devname = btrfs_show_devname, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .free_inode = btrfs_free_inode, .statfs = btrfs_statfs, .remount_fs = btrfs_remount, .freeze_fs = btrfs_freeze, .unfreeze_fs = btrfs_unfreeze, }; static const struct file_operations btrfs_ctl_fops = { .open = btrfs_control_open, .unlocked_ioctl = btrfs_control_ioctl, .compat_ioctl = compat_ptr_ioctl, .owner = THIS_MODULE, .llseek = noop_llseek, }; static struct miscdevice btrfs_misc = { .minor = BTRFS_MINOR, .name = "btrfs-control", .fops = &btrfs_ctl_fops }; MODULE_ALIAS_MISCDEV(BTRFS_MINOR); MODULE_ALIAS("devname:btrfs-control"); static int __init btrfs_interface_init(void) { return misc_register(&btrfs_misc); } static __cold void btrfs_interface_exit(void) { misc_deregister(&btrfs_misc); } static int __init btrfs_print_mod_info(void) { static const char options[] = "" #ifdef CONFIG_BTRFS_DEBUG ", debug=on" #endif #ifdef CONFIG_BTRFS_ASSERT ", assert=on" #endif #ifdef CONFIG_BTRFS_FS_REF_VERIFY ", ref-verify=on" #endif #ifdef CONFIG_BLK_DEV_ZONED ", zoned=yes" #else ", zoned=no" #endif #ifdef CONFIG_FS_VERITY ", fsverity=yes" #else ", fsverity=no" #endif ; pr_info("Btrfs loaded%s\n", options); return 0; } static int register_btrfs(void) { return register_filesystem(&btrfs_fs_type); } static void unregister_btrfs(void) { unregister_filesystem(&btrfs_fs_type); } /* Helper structure for long init/exit functions. */ struct init_sequence { int (*init_func)(void); /* Can be NULL if the init_func doesn't need cleanup. */ void (*exit_func)(void); }; static const struct init_sequence mod_init_seq[] = { { .init_func = btrfs_props_init, .exit_func = NULL, }, { .init_func = btrfs_init_sysfs, .exit_func = btrfs_exit_sysfs, }, { .init_func = btrfs_init_compress, .exit_func = btrfs_exit_compress, }, { .init_func = btrfs_init_cachep, .exit_func = btrfs_destroy_cachep, }, { .init_func = btrfs_transaction_init, .exit_func = btrfs_transaction_exit, }, { .init_func = btrfs_ctree_init, .exit_func = btrfs_ctree_exit, }, { .init_func = btrfs_free_space_init, .exit_func = btrfs_free_space_exit, }, { .init_func = extent_state_init_cachep, .exit_func = extent_state_free_cachep, }, { .init_func = extent_buffer_init_cachep, .exit_func = extent_buffer_free_cachep, }, { .init_func = btrfs_bioset_init, .exit_func = btrfs_bioset_exit, }, { .init_func = extent_map_init, .exit_func = extent_map_exit, }, { .init_func = ordered_data_init, .exit_func = ordered_data_exit, }, { .init_func = btrfs_delayed_inode_init, .exit_func = btrfs_delayed_inode_exit, }, { .init_func = btrfs_auto_defrag_init, .exit_func = btrfs_auto_defrag_exit, }, { .init_func = btrfs_delayed_ref_init, .exit_func = btrfs_delayed_ref_exit, }, { .init_func = btrfs_prelim_ref_init, .exit_func = btrfs_prelim_ref_exit, }, { .init_func = btrfs_interface_init, .exit_func = btrfs_interface_exit, }, { .init_func = btrfs_print_mod_info, .exit_func = NULL, }, { .init_func = btrfs_run_sanity_tests, .exit_func = NULL, }, { .init_func = register_btrfs, .exit_func = unregister_btrfs, } }; static bool mod_init_result[ARRAY_SIZE(mod_init_seq)]; static __always_inline void btrfs_exit_btrfs_fs(void) { int i; for (i = ARRAY_SIZE(mod_init_seq) - 1; i >= 0; i--) { if (!mod_init_result[i]) continue; if (mod_init_seq[i].exit_func) mod_init_seq[i].exit_func(); mod_init_result[i] = false; } } static void __exit exit_btrfs_fs(void) { btrfs_exit_btrfs_fs(); btrfs_cleanup_fs_uuids(); } static int __init init_btrfs_fs(void) { int ret; int i; for (i = 0; i < ARRAY_SIZE(mod_init_seq); i++) { ASSERT(!mod_init_result[i]); ret = mod_init_seq[i].init_func(); if (ret < 0) { btrfs_exit_btrfs_fs(); return ret; } mod_init_result[i] = true; } return 0; } late_initcall(init_btrfs_fs); module_exit(exit_btrfs_fs) MODULE_LICENSE("GPL"); MODULE_SOFTDEP("pre: crc32c"); MODULE_SOFTDEP("pre: xxhash64"); MODULE_SOFTDEP("pre: sha256"); MODULE_SOFTDEP("pre: blake2b-256"); |
6 6 5 3 3 1 20 2 1 12 5 5 1 3 1 1 1 1 1 1 1 2 2 2 2 1 1 1 2 1 2 2 2 2 2 1 1 4 1 1 1 5 5 4 5 4 2 4 1 1 1 1 1 1 2 4 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 | // SPDX-License-Identifier: GPL-2.0-or-later /* * PPP async serial channel driver for Linux. * * Copyright 1999 Paul Mackerras. * * This driver provides the encapsulation and framing for sending * and receiving PPP frames over async serial lines. It relies on * the generic PPP layer to give it frames to send and to process * received frames. It implements the PPP line discipline. * * Part of the code in this driver was inspired by the old async-only * PPP driver, written by Michael Callahan and Al Longyear, and * subsequently hacked by Paul Mackerras. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/tty.h> #include <linux/netdevice.h> #include <linux/poll.h> #include <linux/crc-ccitt.h> #include <linux/ppp_defs.h> #include <linux/ppp-ioctl.h> #include <linux/ppp_channel.h> #include <linux/spinlock.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/jiffies.h> #include <linux/slab.h> #include <asm/unaligned.h> #include <linux/uaccess.h> #include <asm/string.h> #define PPP_VERSION "2.4.2" #define OBUFSIZE 4096 /* Structure for storing local state. */ struct asyncppp { struct tty_struct *tty; unsigned int flags; unsigned int state; unsigned int rbits; int mru; spinlock_t xmit_lock; spinlock_t recv_lock; unsigned long xmit_flags; u32 xaccm[8]; u32 raccm; unsigned int bytes_sent; unsigned int bytes_rcvd; struct sk_buff *tpkt; int tpkt_pos; u16 tfcs; unsigned char *optr; unsigned char *olim; unsigned long last_xmit; struct sk_buff *rpkt; int lcp_fcs; struct sk_buff_head rqueue; struct tasklet_struct tsk; refcount_t refcnt; struct completion dead; struct ppp_channel chan; /* interface to generic ppp layer */ unsigned char obuf[OBUFSIZE]; }; /* Bit numbers in xmit_flags */ #define XMIT_WAKEUP 0 #define XMIT_FULL 1 #define XMIT_BUSY 2 /* State bits */ #define SC_TOSS 1 #define SC_ESCAPE 2 #define SC_PREV_ERROR 4 /* Bits in rbits */ #define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) static int flag_time = HZ; module_param(flag_time, int, 0); MODULE_PARM_DESC(flag_time, "ppp_async: interval between flagged packets (in clock ticks)"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_PPP); /* * Prototypes. */ static int ppp_async_encode(struct asyncppp *ap); static int ppp_async_send(struct ppp_channel *chan, struct sk_buff *skb); static int ppp_async_push(struct asyncppp *ap); static void ppp_async_flush_output(struct asyncppp *ap); static void ppp_async_input(struct asyncppp *ap, const unsigned char *buf, const u8 *flags, int count); static int ppp_async_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg); static void ppp_async_process(struct tasklet_struct *t); static void async_lcp_peek(struct asyncppp *ap, unsigned char *data, int len, int inbound); static const struct ppp_channel_ops async_ops = { .start_xmit = ppp_async_send, .ioctl = ppp_async_ioctl, }; /* * Routines implementing the PPP line discipline. */ /* * We have a potential race on dereferencing tty->disc_data, * because the tty layer provides no locking at all - thus one * cpu could be running ppp_asynctty_receive while another * calls ppp_asynctty_close, which zeroes tty->disc_data and * frees the memory that ppp_asynctty_receive is using. The best * way to fix this is to use a rwlock in the tty struct, but for now * we use a single global rwlock for all ttys in ppp line discipline. * * FIXME: this is no longer true. The _close path for the ldisc is * now guaranteed to be sane. */ static DEFINE_RWLOCK(disc_data_lock); static struct asyncppp *ap_get(struct tty_struct *tty) { struct asyncppp *ap; read_lock(&disc_data_lock); ap = tty->disc_data; if (ap != NULL) refcount_inc(&ap->refcnt); read_unlock(&disc_data_lock); return ap; } static void ap_put(struct asyncppp *ap) { if (refcount_dec_and_test(&ap->refcnt)) complete(&ap->dead); } /* * Called when a tty is put into PPP line discipline. Called in process * context. */ static int ppp_asynctty_open(struct tty_struct *tty) { struct asyncppp *ap; int err; int speed; if (tty->ops->write == NULL) return -EOPNOTSUPP; err = -ENOMEM; ap = kzalloc(sizeof(*ap), GFP_KERNEL); if (!ap) goto out; /* initialize the asyncppp structure */ ap->tty = tty; ap->mru = PPP_MRU; spin_lock_init(&ap->xmit_lock); spin_lock_init(&ap->recv_lock); ap->xaccm[0] = ~0U; ap->xaccm[3] = 0x60000000U; ap->raccm = ~0U; ap->optr = ap->obuf; ap->olim = ap->obuf; ap->lcp_fcs = -1; skb_queue_head_init(&ap->rqueue); tasklet_setup(&ap->tsk, ppp_async_process); refcount_set(&ap->refcnt, 1); init_completion(&ap->dead); ap->chan.private = ap; ap->chan.ops = &async_ops; ap->chan.mtu = PPP_MRU; speed = tty_get_baud_rate(tty); ap->chan.speed = speed; err = ppp_register_channel(&ap->chan); if (err) goto out_free; tty->disc_data = ap; tty->receive_room = 65536; return 0; out_free: kfree(ap); out: return err; } /* * Called when the tty is put into another line discipline * or it hangs up. We have to wait for any cpu currently * executing in any of the other ppp_asynctty_* routines to * finish before we can call ppp_unregister_channel and free * the asyncppp struct. This routine must be called from * process context, not interrupt or softirq context. */ static void ppp_asynctty_close(struct tty_struct *tty) { struct asyncppp *ap; write_lock_irq(&disc_data_lock); ap = tty->disc_data; tty->disc_data = NULL; write_unlock_irq(&disc_data_lock); if (!ap) return; /* * We have now ensured that nobody can start using ap from now * on, but we have to wait for all existing users to finish. * Note that ppp_unregister_channel ensures that no calls to * our channel ops (i.e. ppp_async_send/ioctl) are in progress * by the time it returns. */ if (!refcount_dec_and_test(&ap->refcnt)) wait_for_completion(&ap->dead); tasklet_kill(&ap->tsk); ppp_unregister_channel(&ap->chan); kfree_skb(ap->rpkt); skb_queue_purge(&ap->rqueue); kfree_skb(ap->tpkt); kfree(ap); } /* * Called on tty hangup in process context. * * Wait for I/O to driver to complete and unregister PPP channel. * This is already done by the close routine, so just call that. */ static void ppp_asynctty_hangup(struct tty_struct *tty) { ppp_asynctty_close(tty); } /* * Read does nothing - no data is ever available this way. * Pppd reads and writes packets via /dev/ppp instead. */ static ssize_t ppp_asynctty_read(struct tty_struct *tty, struct file *file, u8 *buf, size_t count, void **cookie, unsigned long offset) { return -EAGAIN; } /* * Write on the tty does nothing, the packets all come in * from the ppp generic stuff. */ static ssize_t ppp_asynctty_write(struct tty_struct *tty, struct file *file, const u8 *buf, size_t count) { return -EAGAIN; } /* * Called in process context only. May be re-entered by multiple * ioctl calling threads. */ static int ppp_asynctty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct asyncppp *ap = ap_get(tty); int err, val; int __user *p = (int __user *)arg; if (!ap) return -ENXIO; err = -EFAULT; switch (cmd) { case PPPIOCGCHAN: err = -EFAULT; if (put_user(ppp_channel_index(&ap->chan), p)) break; err = 0; break; case PPPIOCGUNIT: err = -EFAULT; if (put_user(ppp_unit_number(&ap->chan), p)) break; err = 0; break; case TCFLSH: /* flush our buffers and the serial port's buffer */ if (arg == TCIOFLUSH || arg == TCOFLUSH) ppp_async_flush_output(ap); err = n_tty_ioctl_helper(tty, cmd, arg); break; case FIONREAD: val = 0; if (put_user(val, p)) break; err = 0; break; default: /* Try the various mode ioctls */ err = tty_mode_ioctl(tty, cmd, arg); } ap_put(ap); return err; } /* May sleep, don't call from interrupt level or with interrupts disabled */ static void ppp_asynctty_receive(struct tty_struct *tty, const u8 *buf, const u8 *cflags, size_t count) { struct asyncppp *ap = ap_get(tty); unsigned long flags; if (!ap) return; spin_lock_irqsave(&ap->recv_lock, flags); ppp_async_input(ap, buf, cflags, count); spin_unlock_irqrestore(&ap->recv_lock, flags); if (!skb_queue_empty(&ap->rqueue)) tasklet_schedule(&ap->tsk); ap_put(ap); tty_unthrottle(tty); } static void ppp_asynctty_wakeup(struct tty_struct *tty) { struct asyncppp *ap = ap_get(tty); clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); if (!ap) return; set_bit(XMIT_WAKEUP, &ap->xmit_flags); tasklet_schedule(&ap->tsk); ap_put(ap); } static struct tty_ldisc_ops ppp_ldisc = { .owner = THIS_MODULE, .num = N_PPP, .name = "ppp", .open = ppp_asynctty_open, .close = ppp_asynctty_close, .hangup = ppp_asynctty_hangup, .read = ppp_asynctty_read, .write = ppp_asynctty_write, .ioctl = ppp_asynctty_ioctl, .receive_buf = ppp_asynctty_receive, .write_wakeup = ppp_asynctty_wakeup, }; static int __init ppp_async_init(void) { int err; err = tty_register_ldisc(&ppp_ldisc); if (err != 0) printk(KERN_ERR "PPP_async: error %d registering line disc.\n", err); return err; } /* * The following routines provide the PPP channel interface. */ static int ppp_async_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg) { struct asyncppp *ap = chan->private; void __user *argp = (void __user *)arg; int __user *p = argp; int err, val; u32 accm[8]; err = -EFAULT; switch (cmd) { case PPPIOCGFLAGS: val = ap->flags | ap->rbits; if (put_user(val, p)) break; err = 0; break; case PPPIOCSFLAGS: if (get_user(val, p)) break; ap->flags = val & ~SC_RCV_BITS; spin_lock_irq(&ap->recv_lock); ap->rbits = val & SC_RCV_BITS; spin_unlock_irq(&ap->recv_lock); err = 0; break; case PPPIOCGASYNCMAP: if (put_user(ap->xaccm[0], (u32 __user *)argp)) break; err = 0; break; case PPPIOCSASYNCMAP: if (get_user(ap->xaccm[0], (u32 __user *)argp)) break; err = 0; break; case PPPIOCGRASYNCMAP: if (put_user(ap->raccm, (u32 __user *)argp)) break; err = 0; break; case PPPIOCSRASYNCMAP: if (get_user(ap->raccm, (u32 __user *)argp)) break; err = 0; break; case PPPIOCGXASYNCMAP: if (copy_to_user(argp, ap->xaccm, sizeof(ap->xaccm))) break; err = 0; break; case PPPIOCSXASYNCMAP: if (copy_from_user(accm, argp, sizeof(accm))) break; accm[2] &= ~0x40000000U; /* can't escape 0x5e */ accm[3] |= 0x60000000U; /* must escape 0x7d, 0x7e */ memcpy(ap->xaccm, accm, sizeof(ap->xaccm)); err = 0; break; case PPPIOCGMRU: if (put_user(ap->mru, p)) break; err = 0; break; case PPPIOCSMRU: if (get_user(val, p)) break; if (val < PPP_MRU) val = PPP_MRU; ap->mru = val; err = 0; break; default: err = -ENOTTY; } return err; } /* * This is called at softirq level to deliver received packets * to the ppp_generic code, and to tell the ppp_generic code * if we can accept more output now. */ static void ppp_async_process(struct tasklet_struct *t) { struct asyncppp *ap = from_tasklet(ap, t, tsk); struct sk_buff *skb; /* process received packets */ while ((skb = skb_dequeue(&ap->rqueue)) != NULL) { if (skb->cb[0]) ppp_input_error(&ap->chan, 0); ppp_input(&ap->chan, skb); } /* try to push more stuff out */ if (test_bit(XMIT_WAKEUP, &ap->xmit_flags) && ppp_async_push(ap)) ppp_output_wakeup(&ap->chan); } /* * Procedures for encapsulation and framing. */ /* * Procedure to encode the data for async serial transmission. * Does octet stuffing (escaping), puts the address/control bytes * on if A/C compression is disabled, and does protocol compression. * Assumes ap->tpkt != 0 on entry. * Returns 1 if we finished the current frame, 0 otherwise. */ #define PUT_BYTE(ap, buf, c, islcp) do { \ if ((islcp && c < 0x20) || (ap->xaccm[c >> 5] & (1 << (c & 0x1f)))) {\ *buf++ = PPP_ESCAPE; \ *buf++ = c ^ PPP_TRANS; \ } else \ *buf++ = c; \ } while (0) static int ppp_async_encode(struct asyncppp *ap) { int fcs, i, count, c, proto; unsigned char *buf, *buflim; unsigned char *data; int islcp; buf = ap->obuf; ap->olim = buf; ap->optr = buf; i = ap->tpkt_pos; data = ap->tpkt->data; count = ap->tpkt->len; fcs = ap->tfcs; proto = get_unaligned_be16(data); /* * LCP packets with code values between 1 (configure-reqest) * and 7 (code-reject) must be sent as though no options * had been negotiated. */ islcp = proto == PPP_LCP && 1 <= data[2] && data[2] <= 7; if (i == 0) { if (islcp) async_lcp_peek(ap, data, count, 0); /* * Start of a new packet - insert the leading FLAG * character if necessary. */ if (islcp || flag_time == 0 || time_after_eq(jiffies, ap->last_xmit + flag_time)) *buf++ = PPP_FLAG; ap->last_xmit = jiffies; fcs = PPP_INITFCS; /* * Put in the address/control bytes if necessary */ if ((ap->flags & SC_COMP_AC) == 0 || islcp) { PUT_BYTE(ap, buf, 0xff, islcp); fcs = PPP_FCS(fcs, 0xff); PUT_BYTE(ap, buf, 0x03, islcp); fcs = PPP_FCS(fcs, 0x03); } } /* * Once we put in the last byte, we need to put in the FCS * and closing flag, so make sure there is at least 7 bytes * of free space in the output buffer. */ buflim = ap->obuf + OBUFSIZE - 6; while (i < count && buf < buflim) { c = data[i++]; if (i == 1 && c == 0 && (ap->flags & SC_COMP_PROT)) continue; /* compress protocol field */ fcs = PPP_FCS(fcs, c); PUT_BYTE(ap, buf, c, islcp); } if (i < count) { /* * Remember where we are up to in this packet. */ ap->olim = buf; ap->tpkt_pos = i; ap->tfcs = fcs; return 0; } /* * We have finished the packet. Add the FCS and flag. */ fcs = ~fcs; c = fcs & 0xff; PUT_BYTE(ap, buf, c, islcp); c = (fcs >> 8) & 0xff; PUT_BYTE(ap, buf, c, islcp); *buf++ = PPP_FLAG; ap->olim = buf; consume_skb(ap->tpkt); ap->tpkt = NULL; return 1; } /* * Transmit-side routines. */ /* * Send a packet to the peer over an async tty line. * Returns 1 iff the packet was accepted. * If the packet was not accepted, we will call ppp_output_wakeup * at some later time. */ static int ppp_async_send(struct ppp_channel *chan, struct sk_buff *skb) { struct asyncppp *ap = chan->private; ppp_async_push(ap); if (test_and_set_bit(XMIT_FULL, &ap->xmit_flags)) return 0; /* already full */ ap->tpkt = skb; ap->tpkt_pos = 0; ppp_async_push(ap); return 1; } /* * Push as much data as possible out to the tty. */ static int ppp_async_push(struct asyncppp *ap) { int avail, sent, done = 0; struct tty_struct *tty = ap->tty; int tty_stuffed = 0; /* * We can get called recursively here if the tty write * function calls our wakeup function. This can happen * for example on a pty with both the master and slave * set to PPP line discipline. * We use the XMIT_BUSY bit to detect this and get out, * leaving the XMIT_WAKEUP bit set to tell the other * instance that it may now be able to write more now. */ if (test_and_set_bit(XMIT_BUSY, &ap->xmit_flags)) return 0; spin_lock_bh(&ap->xmit_lock); for (;;) { if (test_and_clear_bit(XMIT_WAKEUP, &ap->xmit_flags)) tty_stuffed = 0; if (!tty_stuffed && ap->optr < ap->olim) { avail = ap->olim - ap->optr; set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); sent = tty->ops->write(tty, ap->optr, avail); if (sent < 0) goto flush; /* error, e.g. loss of CD */ ap->optr += sent; if (sent < avail) tty_stuffed = 1; continue; } if (ap->optr >= ap->olim && ap->tpkt) { if (ppp_async_encode(ap)) { /* finished processing ap->tpkt */ clear_bit(XMIT_FULL, &ap->xmit_flags); done = 1; } continue; } /* * We haven't made any progress this time around. * Clear XMIT_BUSY to let other callers in, but * after doing so we have to check if anyone set * XMIT_WAKEUP since we last checked it. If they * did, we should try again to set XMIT_BUSY and go * around again in case XMIT_BUSY was still set when * the other caller tried. */ clear_bit(XMIT_BUSY, &ap->xmit_flags); /* any more work to do? if not, exit the loop */ if (!(test_bit(XMIT_WAKEUP, &ap->xmit_flags) || (!tty_stuffed && ap->tpkt))) break; /* more work to do, see if we can do it now */ if (test_and_set_bit(XMIT_BUSY, &ap->xmit_flags)) break; } spin_unlock_bh(&ap->xmit_lock); return done; flush: clear_bit(XMIT_BUSY, &ap->xmit_flags); if (ap->tpkt) { kfree_skb(ap->tpkt); ap->tpkt = NULL; clear_bit(XMIT_FULL, &ap->xmit_flags); done = 1; } ap->optr = ap->olim; spin_unlock_bh(&ap->xmit_lock); return done; } /* * Flush output from our internal buffers. * Called for the TCFLSH ioctl. Can be entered in parallel * but this is covered by the xmit_lock. */ static void ppp_async_flush_output(struct asyncppp *ap) { int done = 0; spin_lock_bh(&ap->xmit_lock); ap->optr = ap->olim; if (ap->tpkt != NULL) { kfree_skb(ap->tpkt); ap->tpkt = NULL; clear_bit(XMIT_FULL, &ap->xmit_flags); done = 1; } spin_unlock_bh(&ap->xmit_lock); if (done) ppp_output_wakeup(&ap->chan); } /* * Receive-side routines. */ /* see how many ordinary chars there are at the start of buf */ static inline int scan_ordinary(struct asyncppp *ap, const unsigned char *buf, int count) { int i, c; for (i = 0; i < count; ++i) { c = buf[i]; if (c == PPP_ESCAPE || c == PPP_FLAG || (c < 0x20 && (ap->raccm & (1 << c)) != 0)) break; } return i; } /* called when a flag is seen - do end-of-packet processing */ static void process_input_packet(struct asyncppp *ap) { struct sk_buff *skb; unsigned char *p; unsigned int len, fcs; skb = ap->rpkt; if (ap->state & (SC_TOSS | SC_ESCAPE)) goto err; if (skb == NULL) return; /* 0-length packet */ /* check the FCS */ p = skb->data; len = skb->len; if (len < 3) goto err; /* too short */ fcs = PPP_INITFCS; for (; len > 0; --len) fcs = PPP_FCS(fcs, *p++); if (fcs != PPP_GOODFCS) goto err; /* bad FCS */ skb_trim(skb, skb->len - 2); /* check for address/control and protocol compression */ p = skb->data; if (p[0] == PPP_ALLSTATIONS) { /* chop off address/control */ if (p[1] != PPP_UI || skb->len < 3) goto err; p = skb_pull(skb, 2); } /* If protocol field is not compressed, it can be LCP packet */ if (!(p[0] & 0x01)) { unsigned int proto; if (skb->len < 2) goto err; proto = (p[0] << 8) + p[1]; if (proto == PPP_LCP) async_lcp_peek(ap, p, skb->len, 1); } /* queue the frame to be processed */ skb->cb[0] = ap->state; skb_queue_tail(&ap->rqueue, skb); ap->rpkt = NULL; ap->state = 0; return; err: /* frame had an error, remember that, reset SC_TOSS & SC_ESCAPE */ ap->state = SC_PREV_ERROR; if (skb) { /* make skb appear as freshly allocated */ skb_trim(skb, 0); skb_reserve(skb, - skb_headroom(skb)); } } /* Called when the tty driver has data for us. Runs parallel with the other ldisc functions but will not be re-entered */ static void ppp_async_input(struct asyncppp *ap, const u8 *buf, const u8 *flags, int count) { struct sk_buff *skb; int c, i, j, n, s, f; unsigned char *sp; /* update bits used for 8-bit cleanness detection */ if (~ap->rbits & SC_RCV_BITS) { s = 0; for (i = 0; i < count; ++i) { c = buf[i]; if (flags && flags[i] != 0) continue; s |= (c & 0x80)? SC_RCV_B7_1: SC_RCV_B7_0; c = ((c >> 4) ^ c) & 0xf; s |= (0x6996 & (1 << c))? SC_RCV_ODDP: SC_RCV_EVNP; } ap->rbits |= s; } while (count > 0) { /* scan through and see how many chars we can do in bulk */ if ((ap->state & SC_ESCAPE) && buf[0] == PPP_ESCAPE) n = 1; else n = scan_ordinary(ap, buf, count); f = 0; if (flags && (ap->state & SC_TOSS) == 0) { /* check the flags to see if any char had an error */ for (j = 0; j < n; ++j) if ((f = flags[j]) != 0) break; } if (f != 0) { /* start tossing */ ap->state |= SC_TOSS; } else if (n > 0 && (ap->state & SC_TOSS) == 0) { /* stuff the chars in the skb */ skb = ap->rpkt; if (!skb) { skb = dev_alloc_skb(ap->mru + PPP_HDRLEN + 2); if (!skb) goto nomem; ap->rpkt = skb; } if (skb->len == 0) { /* Try to get the payload 4-byte aligned. * This should match the * PPP_ALLSTATIONS/PPP_UI/compressed tests in * process_input_packet, but we do not have * enough chars here to test buf[1] and buf[2]. */ if (buf[0] != PPP_ALLSTATIONS) skb_reserve(skb, 2 + (buf[0] & 1)); } if (n > skb_tailroom(skb)) { /* packet overflowed MRU */ ap->state |= SC_TOSS; } else { sp = skb_put_data(skb, buf, n); if (ap->state & SC_ESCAPE) { sp[0] ^= PPP_TRANS; ap->state &= ~SC_ESCAPE; } } } if (n >= count) break; c = buf[n]; if (flags != NULL && flags[n] != 0) { ap->state |= SC_TOSS; } else if (c == PPP_FLAG) { process_input_packet(ap); } else if (c == PPP_ESCAPE) { ap->state |= SC_ESCAPE; } else if (I_IXON(ap->tty)) { if (c == START_CHAR(ap->tty)) start_tty(ap->tty); else if (c == STOP_CHAR(ap->tty)) stop_tty(ap->tty); } /* otherwise it's a char in the recv ACCM */ ++n; buf += n; if (flags) flags += n; count -= n; } return; nomem: printk(KERN_ERR "PPPasync: no memory (input pkt)\n"); ap->state |= SC_TOSS; } /* * We look at LCP frames going past so that we can notice * and react to the LCP configure-ack from the peer. * In the situation where the peer has been sent a configure-ack * already, LCP is up once it has sent its configure-ack * so the immediately following packet can be sent with the * configured LCP options. This allows us to process the following * packet correctly without pppd needing to respond quickly. * * We only respond to the received configure-ack if we have just * sent a configure-request, and the configure-ack contains the * same data (this is checked using a 16-bit crc of the data). */ #define CONFREQ 1 /* LCP code field values */ #define CONFACK 2 #define LCP_MRU 1 /* LCP option numbers */ #define LCP_ASYNCMAP 2 static void async_lcp_peek(struct asyncppp *ap, unsigned char *data, int len, int inbound) { int dlen, fcs, i, code; u32 val; data += 2; /* skip protocol bytes */ len -= 2; if (len < 4) /* 4 = code, ID, length */ return; code = data[0]; if (code != CONFACK && code != CONFREQ) return; dlen = get_unaligned_be16(data + 2); if (len < dlen) return; /* packet got truncated or length is bogus */ if (code == (inbound? CONFACK: CONFREQ)) { /* * sent confreq or received confack: * calculate the crc of the data from the ID field on. */ fcs = PPP_INITFCS; for (i = 1; i < dlen; ++i) fcs = PPP_FCS(fcs, data[i]); if (!inbound) { /* outbound confreq - remember the crc for later */ ap->lcp_fcs = fcs; return; } /* received confack, check the crc */ fcs ^= ap->lcp_fcs; ap->lcp_fcs = -1; if (fcs != 0) return; } else if (inbound) return; /* not interested in received confreq */ /* process the options in the confack */ data += 4; dlen -= 4; /* data[0] is code, data[1] is length */ while (dlen >= 2 && dlen >= data[1] && data[1] >= 2) { switch (data[0]) { case LCP_MRU: val = get_unaligned_be16(data + 2); if (inbound) ap->mru = val; else ap->chan.mtu = val; break; case LCP_ASYNCMAP: val = get_unaligned_be32(data + 2); if (inbound) ap->raccm = val; else ap->xaccm[0] = val; break; } dlen -= data[1]; data += data[1]; } } static void __exit ppp_async_cleanup(void) { tty_unregister_ldisc(&ppp_ldisc); } module_init(ppp_async_init); module_exit(ppp_async_cleanup); |
3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2006 Patrick McHardy <kaber@trash.net> * * Based on ipt_random and ipt_nth by Fabrice MARIE <fabrice@netfilter.org>. */ #include <linux/init.h> #include <linux/spinlock.h> #include <linux/skbuff.h> #include <linux/net.h> #include <linux/slab.h> #include <linux/netfilter/xt_statistic.h> #include <linux/netfilter/x_tables.h> #include <linux/module.h> struct xt_statistic_priv { atomic_t count; } ____cacheline_aligned_in_smp; MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)"); MODULE_ALIAS("ipt_statistic"); MODULE_ALIAS("ip6t_statistic"); static bool statistic_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_statistic_info *info = par->matchinfo; bool ret = info->flags & XT_STATISTIC_INVERT; int nval, oval; switch (info->mode) { case XT_STATISTIC_MODE_RANDOM: if ((get_random_u32() & 0x7FFFFFFF) < info->u.random.probability) ret = !ret; break; case XT_STATISTIC_MODE_NTH: do { oval = atomic_read(&info->master->count); nval = (oval == info->u.nth.every) ? 0 : oval + 1; } while (atomic_cmpxchg(&info->master->count, oval, nval) != oval); if (nval == 0) ret = !ret; break; } return ret; } static int statistic_mt_check(const struct xt_mtchk_param *par) { struct xt_statistic_info *info = par->matchinfo; if (info->mode > XT_STATISTIC_MODE_MAX || info->flags & ~XT_STATISTIC_MASK) return -EINVAL; info->master = kzalloc(sizeof(*info->master), GFP_KERNEL); if (info->master == NULL) return -ENOMEM; atomic_set(&info->master->count, info->u.nth.count); return 0; } static void statistic_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_statistic_info *info = par->matchinfo; kfree(info->master); } static struct xt_match xt_statistic_mt_reg __read_mostly = { .name = "statistic", .revision = 0, .family = NFPROTO_UNSPEC, .match = statistic_mt, .checkentry = statistic_mt_check, .destroy = statistic_mt_destroy, .matchsize = sizeof(struct xt_statistic_info), .usersize = offsetof(struct xt_statistic_info, master), .me = THIS_MODULE, }; static int __init statistic_mt_init(void) { return xt_register_match(&xt_statistic_mt_reg); } static void __exit statistic_mt_exit(void) { xt_unregister_match(&xt_statistic_mt_reg); } module_init(statistic_mt_init); module_exit(statistic_mt_exit); |
1316 321 15 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RCULIST_NULLS_H #define _LINUX_RCULIST_NULLS_H #ifdef __KERNEL__ /* * RCU-protected list version */ #include <linux/list_nulls.h> #include <linux/rcupdate.h> /** * hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization * @n: the element to delete from the hash list. * * Note: hlist_nulls_unhashed() on the node return true after this. It is * useful for RCU based read lockfree traversal if the writer side * must know if the list entry is still hashed or already unhashed. * * In particular, it means that we can not poison the forward pointers * that may still be used for walking the hash list and we can only * zero the pprev pointer so list_unhashed() will return true after * this. * * The caller must take whatever precautions are necessary (such as * holding appropriate locks) to avoid racing with another * list-mutation primitive, such as hlist_nulls_add_head_rcu() or * hlist_nulls_del_rcu(), running on this same list. However, it is * perfectly legal to run concurrently with the _rcu list-traversal * primitives, such as hlist_nulls_for_each_entry_rcu(). */ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) { if (!hlist_nulls_unhashed(n)) { __hlist_nulls_del(n); WRITE_ONCE(n->pprev, NULL); } } /** * hlist_nulls_first_rcu - returns the first element of the hash list. * @head: the head of the list. */ #define hlist_nulls_first_rcu(head) \ (*((struct hlist_nulls_node __rcu __force **)&(head)->first)) /** * hlist_nulls_next_rcu - returns the element of the list after @node. * @node: element of the list. */ #define hlist_nulls_next_rcu(node) \ (*((struct hlist_nulls_node __rcu __force **)&(node)->next)) /** * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. * * Note: hlist_nulls_unhashed() on entry does not return true after this, * the entry is in an undefined state. It is useful for RCU based * lockfree traversal. * * In particular, it means that we can not poison the forward * pointers that may still be used for walking the hash list. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() * or hlist_nulls_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_nulls_for_each_entry(). */ static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) { __hlist_nulls_del(n); WRITE_ONCE(n->pprev, LIST_POISON2); } /** * hlist_nulls_add_head_rcu * @n: the element to add to the hash list. * @h: the list to add to. * * Description: * Adds the specified element to the specified hlist_nulls, * while permitting racing traversals. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() * or hlist_nulls_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency * problems on Alpha CPUs. Regardless of the type of CPU, the * list-traversal primitive must be guarded by rcu_read_lock(). */ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, struct hlist_nulls_head *h) { struct hlist_nulls_node *first = h->first; WRITE_ONCE(n->next, first); WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) WRITE_ONCE(first->pprev, &n->next); } /** * hlist_nulls_add_tail_rcu * @n: the element to add to the hash list. * @h: the list to add to. * * Description: * Adds the specified element to the specified hlist_nulls, * while permitting racing traversals. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() * or hlist_nulls_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency * problems on Alpha CPUs. Regardless of the type of CPU, the * list-traversal primitive must be guarded by rcu_read_lock(). */ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, struct hlist_nulls_head *h) { struct hlist_nulls_node *i, *last = NULL; /* Note: write side code, so rcu accessors are not needed. */ for (i = h->first; !is_a_nulls(i); i = i->next) last = i; if (last) { WRITE_ONCE(n->next, last->next); n->pprev = &last->next; rcu_assign_pointer(hlist_nulls_next_rcu(last), n); } else { hlist_nulls_add_head_rcu(n, h); } } /* after that hlist_nulls_del will work */ static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n) { n->pprev = &n->next; n->next = (struct hlist_nulls_node *)NULLS_MARKER(NULL); } /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_nulls_node to use as a loop cursor. * @head: the head of the list. * @member: the name of the hlist_nulls_node within the struct. * * The barrier() is needed to make sure compiler doesn't cache first element [1], * as this loop can be restarted [2] * [1] Documentation/memory-barriers.txt around line 1533 * [2] Documentation/RCU/rculist_nulls.rst around line 146 */ #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ for (({barrier();}), \ pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ (!is_a_nulls(pos)) && \ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) /** * hlist_nulls_for_each_entry_safe - * iterate over list of given type safe against removal of list entry * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_nulls_node to use as a loop cursor. * @head: the head of the list. * @member: the name of the hlist_nulls_node within the struct. */ #define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \ for (({barrier();}), \ pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ (!is_a_nulls(pos)) && \ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });) #endif #endif |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Definitions for the 'struct skb_array' datastructure. * * Author: * Michael S. Tsirkin <mst@redhat.com> * * Copyright (C) 2016 Red Hat, Inc. * * Limited-size FIFO of skbs. Can be used more or less whenever * sk_buff_head can be used, except you need to know the queue size in * advance. * Implemented as a type-safe wrapper around ptr_ring. */ #ifndef _LINUX_SKB_ARRAY_H #define _LINUX_SKB_ARRAY_H 1 #ifdef __KERNEL__ #include <linux/ptr_ring.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> #endif struct skb_array { struct ptr_ring ring; }; /* Might be slightly faster than skb_array_full below, but callers invoking * this in a loop must use a compiler barrier, for example cpu_relax(). */ static inline bool __skb_array_full(struct skb_array *a) { return __ptr_ring_full(&a->ring); } static inline bool skb_array_full(struct skb_array *a) { return ptr_ring_full(&a->ring); } static inline int skb_array_produce(struct skb_array *a, struct sk_buff *skb) { return ptr_ring_produce(&a->ring, skb); } static inline int skb_array_produce_irq(struct skb_array *a, struct sk_buff *skb) { return ptr_ring_produce_irq(&a->ring, skb); } static inline int skb_array_produce_bh(struct skb_array *a, struct sk_buff *skb) { return ptr_ring_produce_bh(&a->ring, skb); } static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb) { return ptr_ring_produce_any(&a->ring, skb); } /* Might be slightly faster than skb_array_empty below, but only safe if the * array is never resized. Also, callers invoking this in a loop must take care * to use a compiler barrier, for example cpu_relax(). */ static inline bool __skb_array_empty(struct skb_array *a) { return __ptr_ring_empty(&a->ring); } static inline struct sk_buff *__skb_array_peek(struct skb_array *a) { return __ptr_ring_peek(&a->ring); } static inline bool skb_array_empty(struct skb_array *a) { return ptr_ring_empty(&a->ring); } static inline bool skb_array_empty_bh(struct skb_array *a) { return ptr_ring_empty_bh(&a->ring); } static inline bool skb_array_empty_irq(struct skb_array *a) { return ptr_ring_empty_irq(&a->ring); } static inline bool skb_array_empty_any(struct skb_array *a) { return ptr_ring_empty_any(&a->ring); } static inline struct sk_buff *__skb_array_consume(struct skb_array *a) { return __ptr_ring_consume(&a->ring); } static inline struct sk_buff *skb_array_consume(struct skb_array *a) { return ptr_ring_consume(&a->ring); } static inline int skb_array_consume_batched(struct skb_array *a, struct sk_buff **array, int n) { return ptr_ring_consume_batched(&a->ring, (void **)array, n); } static inline struct sk_buff *skb_array_consume_irq(struct skb_array *a) { return ptr_ring_consume_irq(&a->ring); } static inline int skb_array_consume_batched_irq(struct skb_array *a, struct sk_buff **array, int n) { return ptr_ring_consume_batched_irq(&a->ring, (void **)array, n); } static inline struct sk_buff *skb_array_consume_any(struct skb_array *a) { return ptr_ring_consume_any(&a->ring); } static inline int skb_array_consume_batched_any(struct skb_array *a, struct sk_buff **array, int n) { return ptr_ring_consume_batched_any(&a->ring, (void **)array, n); } static inline struct sk_buff *skb_array_consume_bh(struct skb_array *a) { return ptr_ring_consume_bh(&a->ring); } static inline int skb_array_consume_batched_bh(struct skb_array *a, struct sk_buff **array, int n) { return ptr_ring_consume_batched_bh(&a->ring, (void **)array, n); } static inline int __skb_array_len_with_tag(struct sk_buff *skb) { if (likely(skb)) { int len = skb->len; if (skb_vlan_tag_present(skb)) len += VLAN_HLEN; return len; } else { return 0; } } static inline int skb_array_peek_len(struct skb_array *a) { return PTR_RING_PEEK_CALL(&a->ring, __skb_array_len_with_tag); } static inline int skb_array_peek_len_irq(struct skb_array *a) { return PTR_RING_PEEK_CALL_IRQ(&a->ring, __skb_array_len_with_tag); } static inline int skb_array_peek_len_bh(struct skb_array *a) { return PTR_RING_PEEK_CALL_BH(&a->ring, __skb_array_len_with_tag); } static inline int skb_array_peek_len_any(struct skb_array *a) { return PTR_RING_PEEK_CALL_ANY(&a->ring, __skb_array_len_with_tag); } static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp) { return ptr_ring_init(&a->ring, size, gfp); } static void __skb_array_destroy_skb(void *ptr) { kfree_skb(ptr); } static inline void skb_array_unconsume(struct skb_array *a, struct sk_buff **skbs, int n) { ptr_ring_unconsume(&a->ring, (void **)skbs, n, __skb_array_destroy_skb); } static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) { return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb); } static inline int skb_array_resize_multiple(struct skb_array **rings, int nrings, unsigned int size, gfp_t gfp) { BUILD_BUG_ON(offsetof(struct skb_array, ring)); return ptr_ring_resize_multiple((struct ptr_ring **)rings, nrings, size, gfp, __skb_array_destroy_skb); } static inline void skb_array_cleanup(struct skb_array *a) { ptr_ring_cleanup(&a->ring, __skb_array_destroy_skb); } #endif /* _LINUX_SKB_ARRAY_H */ |
9 8 9 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 | /* * linux/fs/nls/mac-centeuro.c * * Charset maccenteuro translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ /* * COPYRIGHT AND PERMISSION NOTICE * * Copyright 1991-2012 Unicode, Inc. All rights reserved. Distributed under * the Terms of Use in http://www.unicode.org/copyright.html. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of the Unicode data files and any associated documentation (the "Data * Files") or Unicode software and any associated documentation (the * "Software") to deal in the Data Files or Software without restriction, * including without limitation the rights to use, copy, modify, merge, * publish, distribute, and/or sell copies of the Data Files or Software, and * to permit persons to whom the Data Files or Software are furnished to do * so, provided that (a) the above copyright notice(s) and this permission * notice appear with all copies of the Data Files or Software, (b) both the * above copyright notice(s) and this permission notice appear in associated * documentation, and (c) there is clear notice in each modified Data File or * in the Software as well as in the documentation associated with the Data * File(s) or Software that the data or software has been modified. * * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF * THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THE DATA FILES OR SOFTWARE. * * Except as contained in this notice, the name of a copyright holder shall * not be used in advertising or otherwise to promote the sale, use or other * dealings in these Data Files or Software without prior written * authorization of the copyright holder. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80 */ 0x00c4, 0x0100, 0x0101, 0x00c9, 0x0104, 0x00d6, 0x00dc, 0x00e1, 0x0105, 0x010c, 0x00e4, 0x010d, 0x0106, 0x0107, 0x00e9, 0x0179, /* 0x90 */ 0x017a, 0x010e, 0x00ed, 0x010f, 0x0112, 0x0113, 0x0116, 0x00f3, 0x0117, 0x00f4, 0x00f6, 0x00f5, 0x00fa, 0x011a, 0x011b, 0x00fc, /* 0xa0 */ 0x2020, 0x00b0, 0x0118, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df, 0x00ae, 0x00a9, 0x2122, 0x0119, 0x00a8, 0x2260, 0x0123, 0x012e, /* 0xb0 */ 0x012f, 0x012a, 0x2264, 0x2265, 0x012b, 0x0136, 0x2202, 0x2211, 0x0142, 0x013b, 0x013c, 0x013d, 0x013e, 0x0139, 0x013a, 0x0145, /* 0xc0 */ 0x0146, 0x0143, 0x00ac, 0x221a, 0x0144, 0x0147, 0x2206, 0x00ab, 0x00bb, 0x2026, 0x00a0, 0x0148, 0x0150, 0x00d5, 0x0151, 0x014c, /* 0xd0 */ 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca, 0x014d, 0x0154, 0x0155, 0x0158, 0x2039, 0x203a, 0x0159, 0x0156, /* 0xe0 */ 0x0157, 0x0160, 0x201a, 0x201e, 0x0161, 0x015a, 0x015b, 0x00c1, 0x0164, 0x0165, 0x00cd, 0x017d, 0x017e, 0x016a, 0x00d3, 0x00d4, /* 0xf0 */ 0x016b, 0x016e, 0x00da, 0x016f, 0x0170, 0x0171, 0x0172, 0x0173, 0x00dd, 0x00fd, 0x0137, 0x017b, 0x0141, 0x017c, 0x0122, 0x02c7, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xca, 0x00, 0x00, 0xa3, 0x00, 0x00, 0x00, 0xa4, /* 0xa0-0xa7 */ 0xac, 0xa9, 0x00, 0xc7, 0xc2, 0x00, 0xa8, 0x00, /* 0xa8-0xaf */ 0xa1, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa6, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0xc8, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0xe7, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x83, 0x00, 0x00, 0x00, 0xea, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0xee, 0xef, 0xcd, 0x85, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0xf2, 0x00, 0x86, 0xf8, 0x00, 0xa7, /* 0xd8-0xdf */ 0x00, 0x87, 0x00, 0x00, 0x8a, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x8e, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x97, 0x99, 0x9b, 0x9a, 0xd6, /* 0xf0-0xf7 */ 0x00, 0x00, 0x9c, 0x00, 0x9f, 0xf9, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page01[256] = { 0x81, 0x82, 0x00, 0x00, 0x84, 0x88, 0x8c, 0x8d, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x89, 0x8b, 0x91, 0x93, /* 0x08-0x0f */ 0x00, 0x00, 0x94, 0x95, 0x00, 0x00, 0x96, 0x98, /* 0x10-0x17 */ 0xa2, 0xab, 0x9d, 0x9e, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0xfe, 0xae, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0xb1, 0xb4, 0x00, 0x00, 0xaf, 0xb0, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb5, 0xfa, /* 0x30-0x37 */ 0x00, 0xbd, 0xbe, 0xb9, 0xba, 0xbb, 0xbc, 0x00, /* 0x38-0x3f */ 0x00, 0xfc, 0xb8, 0xc1, 0xc4, 0xbf, 0xc0, 0xc5, /* 0x40-0x47 */ 0xcb, 0x00, 0x00, 0x00, 0xcf, 0xd8, 0x00, 0x00, /* 0x48-0x4f */ 0xcc, 0xce, 0x00, 0x00, 0xd9, 0xda, 0xdf, 0xe0, /* 0x50-0x57 */ 0xdb, 0xde, 0xe5, 0xe6, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0xe1, 0xe4, 0x00, 0x00, 0xe8, 0xe9, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0xed, 0xf0, 0x00, 0x00, 0xf1, 0xf3, /* 0x68-0x6f */ 0xf4, 0xf5, 0xf6, 0xf7, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x8f, 0x90, 0xfb, 0xfd, 0xeb, 0xec, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page02[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0xd0, 0xd1, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xd4, 0xd5, 0xe2, 0x00, 0xd2, 0xd3, 0xe3, 0x00, /* 0x18-0x1f */ 0xa0, 0x00, 0xa5, 0x00, 0x00, 0x00, 0xc9, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0xdc, 0xdd, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page21[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page22[256] = { 0x00, 0x00, 0xb6, 0x00, 0x00, 0x00, 0xc6, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0xad, 0x00, 0x00, 0x00, 0xb2, 0xb3, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page25[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0xd7, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char *const page_uni2charset[256] = { page00, page01, page02, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, page21, page22, NULL, NULL, page25, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x00-0x07 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08-0x0f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10-0x17 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x18-0x1f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x20-0x27 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x28-0x2f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30-0x37 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x38-0x3f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x40-0x47 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x48-0x4f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50-0x57 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x58-0x5f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60-0x67 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x68-0x6f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70-0x77 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x78-0x7f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80-0x87 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x88-0x8f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90-0x97 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x98-0x9f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0-0xa7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa8-0xaf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0-0xb7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb8-0xbf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0-0xc7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc8-0xcf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0-0xd7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd8-0xdf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0-0xe7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe8-0xef */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf0-0xf7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x00-0x07 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08-0x0f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10-0x17 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x18-0x1f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x20-0x27 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x28-0x2f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30-0x37 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x38-0x3f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x40-0x47 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x48-0x4f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50-0x57 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x58-0x5f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60-0x67 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x68-0x6f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70-0x77 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x78-0x7f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80-0x87 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x88-0x8f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90-0x97 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x98-0x9f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0-0xa7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa8-0xaf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0-0xb7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb8-0xbf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0-0xc7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc8-0xcf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0-0xd7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd8-0xdf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0-0xe7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe8-0xef */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf0-0xf7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "maccenteuro", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_maccenteuro(void) { return register_nls(&table); } static void __exit exit_nls_maccenteuro(void) { unregister_nls(&table); } module_init(init_nls_maccenteuro) module_exit(exit_nls_maccenteuro) MODULE_LICENSE("Dual BSD/GPL"); |
3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hfsplus/ioctl.c * * Copyright (C) 2003 * Ethan Benson <erbenson@alaska.net> * partially derived from linux/fs/ext2/ioctl.c * Copyright (C) 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) * Laboratoire MASI - Institut Blaise Pascal * Universite Pierre et Marie Curie (Paris VI) * * hfsplus ioctls */ #include <linux/capability.h> #include <linux/fs.h> #include <linux/mount.h> #include <linux/sched.h> #include <linux/uaccess.h> #include "hfsplus_fs.h" /* * "Blessing" an HFS+ filesystem writes metadata to the superblock informing * the platform firmware which file to boot from */ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags) { struct dentry *dentry = file->f_path.dentry; struct inode *inode = d_inode(dentry); struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); struct hfsplus_vh *vh = sbi->s_vhdr; struct hfsplus_vh *bvh = sbi->s_backup_vhdr; u32 cnid = (unsigned long)dentry->d_fsdata; if (!capable(CAP_SYS_ADMIN)) return -EPERM; mutex_lock(&sbi->vh_mutex); /* Directory containing the bootable system */ vh->finder_info[0] = bvh->finder_info[0] = cpu_to_be32(parent_ino(dentry)); /* * Bootloader. Just using the inode here breaks in the case of * hard links - the firmware wants the ID of the hard link file, * but the inode points at the indirect inode */ vh->finder_info[1] = bvh->finder_info[1] = cpu_to_be32(cnid); /* Per spec, the OS X system folder - same as finder_info[0] here */ vh->finder_info[5] = bvh->finder_info[5] = cpu_to_be32(parent_ino(dentry)); mutex_unlock(&sbi->vh_mutex); return 0; } long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; switch (cmd) { case HFSPLUS_IOC_BLESS: return hfsplus_ioctl_bless(file, argp); default: return -ENOTTY; } } |
1 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | // SPDX-License-Identifier: GPL-2.0-only // // ethtool interface for Ethernet PSE (Power Sourcing Equipment) // and PD (Powered Device) // // Copyright (c) 2022 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> // #include "common.h" #include "linux/pse-pd/pse.h" #include "netlink.h" #include <linux/ethtool_netlink.h> #include <linux/ethtool.h> #include <linux/phy.h> struct pse_req_info { struct ethnl_req_info base; }; struct pse_reply_data { struct ethnl_reply_data base; struct pse_control_status status; }; #define PSE_REPDATA(__reply_base) \ container_of(__reply_base, struct pse_reply_data, base) /* PSE_GET */ const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1] = { [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; static int pse_get_pse_attributes(struct net_device *dev, struct netlink_ext_ack *extack, struct pse_reply_data *data) { struct phy_device *phydev = dev->phydev; if (!phydev) { NL_SET_ERR_MSG(extack, "No PHY is attached"); return -EOPNOTSUPP; } if (!phydev->psec) { NL_SET_ERR_MSG(extack, "No PSE is attached"); return -EOPNOTSUPP; } memset(&data->status, 0, sizeof(data->status)); return pse_ethtool_get_status(phydev->psec, extack, &data->status); } static int pse_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct pse_reply_data *data = PSE_REPDATA(reply_base); struct net_device *dev = reply_base->dev; int ret; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; ret = pse_get_pse_attributes(dev, info->extack, data); ethnl_ops_complete(dev); return ret; } static int pse_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct pse_reply_data *data = PSE_REPDATA(reply_base); const struct pse_control_status *st = &data->status; int len = 0; if (st->podl_admin_state > 0) len += nla_total_size(sizeof(u32)); /* _PODL_PSE_ADMIN_STATE */ if (st->podl_pw_status > 0) len += nla_total_size(sizeof(u32)); /* _PODL_PSE_PW_D_STATUS */ return len; } static int pse_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct pse_reply_data *data = PSE_REPDATA(reply_base); const struct pse_control_status *st = &data->status; if (st->podl_admin_state > 0 && nla_put_u32(skb, ETHTOOL_A_PODL_PSE_ADMIN_STATE, st->podl_admin_state)) return -EMSGSIZE; if (st->podl_pw_status > 0 && nla_put_u32(skb, ETHTOOL_A_PODL_PSE_PW_D_STATUS, st->podl_pw_status)) return -EMSGSIZE; return 0; } /* PSE_SET */ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = { [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] = NLA_POLICY_RANGE(NLA_U32, ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED, ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED), }; static int ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info) { return !!info->attrs[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]; } static int ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info) { struct net_device *dev = req_info->dev; struct pse_control_config config = {}; struct nlattr **tb = info->attrs; struct phy_device *phydev; /* this values are already validated by the ethnl_pse_set_policy */ config.admin_cotrol = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]); phydev = dev->phydev; if (!phydev) { NL_SET_ERR_MSG(info->extack, "No PHY is attached"); return -EOPNOTSUPP; } if (!phydev->psec) { NL_SET_ERR_MSG(info->extack, "No PSE is attached"); return -EOPNOTSUPP; } /* Return errno directly - PSE has no notification */ return pse_ethtool_set_config(phydev->psec, info->extack, &config); } const struct ethnl_request_ops ethnl_pse_request_ops = { .request_cmd = ETHTOOL_MSG_PSE_GET, .reply_cmd = ETHTOOL_MSG_PSE_GET_REPLY, .hdr_attr = ETHTOOL_A_PSE_HEADER, .req_info_size = sizeof(struct pse_req_info), .reply_data_size = sizeof(struct pse_reply_data), .prepare_data = pse_prepare_data, .reply_size = pse_reply_size, .fill_reply = pse_fill_reply, .set_validate = ethnl_set_pse_validate, .set = ethnl_set_pse, /* PSE has no notification */ }; |
6 6 1 5 6 6 6 6 6 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2013 Jie Liu. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_da_format.h" #include "xfs_trans_space.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_trace.h" /* * Calculate the maximum length in bytes that would be required for a local * attribute value as large attributes out of line are not logged. */ STATIC int xfs_log_calc_max_attrsetm_res( struct xfs_mount *mp) { int size; int nblks; size = xfs_attr_leaf_entsize_local_max(mp->m_attr_geo->blksize) - MAXNAMELEN - 1; nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); nblks += XFS_B_TO_FSB(mp, size); nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK); return M_RES(mp)->tr_attrsetm.tr_logres + M_RES(mp)->tr_attrsetrt.tr_logres * nblks; } /* * Compute an alternate set of log reservation sizes for use exclusively with * minimum log size calculations. */ static void xfs_log_calc_trans_resv_for_minlogblocks( struct xfs_mount *mp, struct xfs_trans_resv *resv) { unsigned int rmap_maxlevels = mp->m_rmap_maxlevels; /* * In the early days of rmap+reflink, we always set the rmap maxlevels * to 9 even if the AG was small enough that it would never grow to * that height. Transaction reservation sizes influence the minimum * log size calculation, which influences the size of the log that mkfs * creates. Use the old value here to ensure that newly formatted * small filesystems will mount on older kernels. */ if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp)) mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS; xfs_trans_resv_calc(mp, resv); if (xfs_has_reflink(mp)) { /* * In the early days of reflink, typical log operation counts * were greatly overestimated. */ resv->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK; resv->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT_REFLINK; resv->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK; } else if (xfs_has_rmapbt(mp)) { /* * In the early days of non-reflink rmap, the impact of rmapbt * updates on log counts were not taken into account at all. */ resv->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT; resv->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT; resv->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT; } /* * In the early days of reflink, we did not use deferred refcount * update log items, so log reservations must be recomputed using the * old calculations. */ resv->tr_write.tr_logres = xfs_calc_write_reservation_minlogsize(mp); resv->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation_minlogsize(mp); resv->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation_minlogsize(mp); /* Put everything back the way it was. This goes at the end. */ mp->m_rmap_maxlevels = rmap_maxlevels; } /* * Iterate over the log space reservation table to figure out and return * the maximum one in terms of the pre-calculated values which were done * at mount time. */ void xfs_log_get_max_trans_res( struct xfs_mount *mp, struct xfs_trans_res *max_resp) { struct xfs_trans_resv resv = {}; struct xfs_trans_res *resp; struct xfs_trans_res *end_resp; unsigned int i; int log_space = 0; int attr_space; attr_space = xfs_log_calc_max_attrsetm_res(mp); xfs_log_calc_trans_resv_for_minlogblocks(mp, &resv); resp = (struct xfs_trans_res *)&resv; end_resp = (struct xfs_trans_res *)(&resv + 1); for (i = 0; resp < end_resp; i++, resp++) { int tmp = resp->tr_logcount > 1 ? resp->tr_logres * resp->tr_logcount : resp->tr_logres; trace_xfs_trans_resv_calc_minlogsize(mp, i, resp); if (log_space < tmp) { log_space = tmp; *max_resp = *resp; /* struct copy */ } } if (attr_space > log_space) { *max_resp = resv.tr_attrsetm; /* struct copy */ max_resp->tr_logres = attr_space; } trace_xfs_log_get_max_trans_res(mp, max_resp); } /* * Calculate the minimum valid log size for the given superblock configuration. * Used to calculate the minimum log size at mkfs time, and to determine if * the log is large enough or not at mount time. Returns the minimum size in * filesystem block size units. */ int xfs_log_calc_minimum_size( struct xfs_mount *mp) { struct xfs_trans_res tres = {0}; int max_logres; int min_logblks = 0; int lsunit = 0; xfs_log_get_max_trans_res(mp, &tres); max_logres = xfs_log_calc_unit_res(mp, tres.tr_logres); if (tres.tr_logcount > 1) max_logres *= tres.tr_logcount; if (xfs_has_logv2(mp) && mp->m_sb.sb_logsunit > 1) lsunit = BTOBB(mp->m_sb.sb_logsunit); /* * Two factors should be taken into account for calculating the minimum * log space. * 1) The fundamental limitation is that no single transaction can be * larger than half size of the log. * * From mkfs.xfs, this is considered by the XFS_MIN_LOG_FACTOR * define, which is set to 3. That means we can definitely fit * maximally sized 2 transactions in the log. We'll use this same * value here. * * 2) If the lsunit option is specified, a transaction requires 2 LSU * for the reservation because there are two log writes that can * require padding - the transaction data and the commit record which * are written separately and both can require padding to the LSU. * Consider that we can have an active CIL reservation holding 2*LSU, * but the CIL is not over a push threshold, in this case, if we * don't have enough log space for at one new transaction, which * includes another 2*LSU in the reservation, we will run into dead * loop situation in log space grant procedure. i.e. * xlog_grant_head_wait(). * * Hence the log size needs to be able to contain two maximally sized * and padded transactions, which is (2 * (2 * LSU + maxlres)). * * Also, the log size should be a multiple of the log stripe unit, round * it up to lsunit boundary if lsunit is specified. */ if (lsunit) { min_logblks = roundup_64(BTOBB(max_logres), lsunit) + 2 * lsunit; } else min_logblks = BTOBB(max_logres) + 2 * BBSIZE; min_logblks *= XFS_MIN_LOG_FACTOR; return XFS_BB_TO_FSB(mp, min_logblks); } |
1 1 3 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 | // SPDX-License-Identifier: GPL-2.0+ /* * HID driver for UC-Logic devices not fully compliant with HID standard * * Copyright (c) 2010-2014 Nikolai Kondrashov * Copyright (c) 2013 Martin Rusko */ /* * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include <linux/timer.h> #include "usbhid/usbhid.h" #include "hid-uclogic-params.h" #include "hid-ids.h" /** * uclogic_inrange_timeout - handle pen in-range state timeout. * Emulate input events normally generated when pen goes out of range for * tablets which don't report that. * * @t: The timer the timeout handler is attached to, stored in a struct * uclogic_drvdata. */ static void uclogic_inrange_timeout(struct timer_list *t) { struct uclogic_drvdata *drvdata = from_timer(drvdata, t, inrange_timer); struct input_dev *input = drvdata->pen_input; if (input == NULL) return; input_report_abs(input, ABS_PRESSURE, 0); /* If BTN_TOUCH state is changing */ if (test_bit(BTN_TOUCH, input->key)) { input_event(input, EV_MSC, MSC_SCAN, /* Digitizer Tip Switch usage */ 0xd0042); input_report_key(input, BTN_TOUCH, 0); } input_report_key(input, BTN_TOOL_PEN, 0); input_sync(input); } static __u8 *uclogic_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev); if (drvdata->desc_ptr != NULL) { rdesc = drvdata->desc_ptr; *rsize = drvdata->desc_size; } return rdesc; } static int uclogic_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev); struct uclogic_params *params = &drvdata->params; /* Discard invalid pen usages */ if (params->pen.usage_invalid && (field->application == HID_DG_PEN)) return -1; /* Let hid-core decide what to do */ return 0; } static int uclogic_input_configured(struct hid_device *hdev, struct hid_input *hi) { struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev); struct uclogic_params *params = &drvdata->params; const char *suffix = NULL; struct hid_field *field; size_t i; const struct uclogic_params_frame *frame; /* no report associated (HID_QUIRK_MULTI_INPUT not set) */ if (!hi->report) return 0; /* * If this is the input corresponding to the pen report * in need of tweaking. */ if (hi->report->id == params->pen.id) { /* Remember the input device so we can simulate events */ drvdata->pen_input = hi->input; } /* If it's one of the frame devices */ for (i = 0; i < ARRAY_SIZE(params->frame_list); i++) { frame = ¶ms->frame_list[i]; if (hi->report->id == frame->id) { /* Assign custom suffix, if any */ suffix = frame->suffix; /* * Disable EV_MSC reports for touch ring interfaces to * make the Wacom driver pickup touch ring extents */ if (frame->touch_byte > 0) __clear_bit(EV_MSC, hi->input->evbit); } } if (!suffix) { field = hi->report->field[0]; switch (field->application) { case HID_GD_KEYBOARD: suffix = "Keyboard"; break; case HID_GD_MOUSE: suffix = "Mouse"; break; case HID_GD_KEYPAD: suffix = "Pad"; break; case HID_DG_PEN: case HID_DG_DIGITIZER: suffix = "Pen"; break; case HID_CP_CONSUMER_CONTROL: suffix = "Consumer Control"; break; case HID_GD_SYSTEM_CONTROL: suffix = "System Control"; break; } } if (suffix) hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s %s", hdev->name, suffix); return 0; } static int uclogic_probe(struct hid_device *hdev, const struct hid_device_id *id) { int rc; struct uclogic_drvdata *drvdata = NULL; bool params_initialized = false; if (!hid_is_usb(hdev)) return -EINVAL; /* * libinput requires the pad interface to be on a different node * than the pen, so use QUIRK_MULTI_INPUT for all tablets. */ hdev->quirks |= HID_QUIRK_MULTI_INPUT; hdev->quirks |= HID_QUIRK_HIDINPUT_FORCE; /* Allocate and assign driver data */ drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); if (drvdata == NULL) { rc = -ENOMEM; goto failure; } timer_setup(&drvdata->inrange_timer, uclogic_inrange_timeout, 0); drvdata->re_state = U8_MAX; drvdata->quirks = id->driver_data; hid_set_drvdata(hdev, drvdata); /* Initialize the device and retrieve interface parameters */ rc = uclogic_params_init(&drvdata->params, hdev); if (rc != 0) { hid_err(hdev, "failed probing parameters: %d\n", rc); goto failure; } params_initialized = true; hid_dbg(hdev, "parameters:\n"); uclogic_params_hid_dbg(hdev, &drvdata->params); if (drvdata->params.invalid) { hid_info(hdev, "interface is invalid, ignoring\n"); rc = -ENODEV; goto failure; } /* Generate replacement report descriptor */ rc = uclogic_params_get_desc(&drvdata->params, &drvdata->desc_ptr, &drvdata->desc_size); if (rc) { hid_err(hdev, "failed generating replacement report descriptor: %d\n", rc); goto failure; } rc = hid_parse(hdev); if (rc) { hid_err(hdev, "parse failed\n"); goto failure; } rc = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (rc) { hid_err(hdev, "hw start failed\n"); goto failure; } return 0; failure: /* Assume "remove" might not be called if "probe" failed */ if (params_initialized) uclogic_params_cleanup(&drvdata->params); return rc; } #ifdef CONFIG_PM static int uclogic_resume(struct hid_device *hdev) { int rc; struct uclogic_params params; /* Re-initialize the device, but discard parameters */ rc = uclogic_params_init(¶ms, hdev); if (rc != 0) hid_err(hdev, "failed to re-initialize the device\n"); else uclogic_params_cleanup(¶ms); return rc; } #endif /** * uclogic_exec_event_hook - if the received event is hooked schedules the * associated work. * * @p: Tablet interface report parameters. * @event: Raw event. * @size: The size of event. * * Returns: * Whether the event was hooked or not. */ static bool uclogic_exec_event_hook(struct uclogic_params *p, u8 *event, int size) { struct uclogic_raw_event_hook *curr; if (!p->event_hooks) return false; list_for_each_entry(curr, &p->event_hooks->list, list) { if (curr->size == size && memcmp(curr->event, event, size) == 0) { schedule_work(&curr->work); return true; } } return false; } /** * uclogic_raw_event_pen - handle raw pen events (pen HID reports). * * @drvdata: Driver data. * @data: Report data buffer, can be modified. * @size: Report data size, bytes. * * Returns: * Negative value on error (stops event delivery), zero for success. */ static int uclogic_raw_event_pen(struct uclogic_drvdata *drvdata, u8 *data, int size) { struct uclogic_params_pen *pen = &drvdata->params.pen; WARN_ON(drvdata == NULL); WARN_ON(data == NULL && size != 0); /* If in-range reports are inverted */ if (pen->inrange == UCLOGIC_PARAMS_PEN_INRANGE_INVERTED) { /* Invert the in-range bit */ data[1] ^= 0x40; } /* * If report contains fragmented high-resolution pen * coordinates */ if (size >= 10 && pen->fragmented_hires) { u8 pressure_low_byte; u8 pressure_high_byte; /* Lift pressure bytes */ pressure_low_byte = data[6]; pressure_high_byte = data[7]; /* * Move Y coord to make space for high-order X * coord byte */ data[6] = data[5]; data[5] = data[4]; /* Move high-order X coord byte */ data[4] = data[8]; /* Move high-order Y coord byte */ data[7] = data[9]; /* Place pressure bytes */ data[8] = pressure_low_byte; data[9] = pressure_high_byte; } /* If we need to emulate in-range detection */ if (pen->inrange == UCLOGIC_PARAMS_PEN_INRANGE_NONE) { /* Set in-range bit */ data[1] |= 0x40; /* (Re-)start in-range timeout */ mod_timer(&drvdata->inrange_timer, jiffies + msecs_to_jiffies(100)); } /* If we report tilt and Y direction is flipped */ if (size >= 12 && pen->tilt_y_flipped) data[11] = -data[11]; return 0; } /** * uclogic_raw_event_frame - handle raw frame events (frame HID reports). * * @drvdata: Driver data. * @frame: The parameters of the frame controls to handle. * @data: Report data buffer, can be modified. * @size: Report data size, bytes. * * Returns: * Negative value on error (stops event delivery), zero for success. */ static int uclogic_raw_event_frame( struct uclogic_drvdata *drvdata, const struct uclogic_params_frame *frame, u8 *data, int size) { WARN_ON(drvdata == NULL); WARN_ON(data == NULL && size != 0); /* If need to, and can, set pad device ID for Wacom drivers */ if (frame->dev_id_byte > 0 && frame->dev_id_byte < size) { /* If we also have a touch ring and the finger left it */ if (frame->touch_byte > 0 && frame->touch_byte < size && data[frame->touch_byte] == 0) { data[frame->dev_id_byte] = 0; } else { data[frame->dev_id_byte] = 0xf; } } /* If need to, and can, read rotary encoder state change */ if (frame->re_lsb > 0 && frame->re_lsb / 8 < size) { unsigned int byte = frame->re_lsb / 8; unsigned int bit = frame->re_lsb % 8; u8 change; u8 prev_state = drvdata->re_state; /* Read Gray-coded state */ u8 state = (data[byte] >> bit) & 0x3; /* Encode state change into 2-bit signed integer */ if ((prev_state == 1 && state == 0) || (prev_state == 2 && state == 3)) { change = 1; } else if ((prev_state == 2 && state == 0) || (prev_state == 1 && state == 3)) { change = 3; } else { change = 0; } /* Write change */ data[byte] = (data[byte] & ~((u8)3 << bit)) | (change << bit); /* Remember state */ drvdata->re_state = state; } /* If need to, and can, transform the touch ring reports */ if (frame->touch_byte > 0 && frame->touch_byte < size) { __s8 value = data[frame->touch_byte]; if (value != 0) { if (frame->touch_flip_at != 0) { value = frame->touch_flip_at - value; if (value <= 0) value = frame->touch_max + value; } data[frame->touch_byte] = value - 1; } } /* If need to, and can, transform the bitmap dial reports */ if (frame->bitmap_dial_byte > 0 && frame->bitmap_dial_byte < size) { if (data[frame->bitmap_dial_byte] == 2) data[frame->bitmap_dial_byte] = -1; } return 0; } static int uclogic_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { unsigned int report_id = report->id; struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev); struct uclogic_params *params = &drvdata->params; struct uclogic_params_pen_subreport *subreport; struct uclogic_params_pen_subreport *subreport_list_end; size_t i; /* Do not handle anything but input reports */ if (report->type != HID_INPUT_REPORT) return 0; if (uclogic_exec_event_hook(params, data, size)) return 0; while (true) { /* Tweak pen reports, if necessary */ if ((report_id == params->pen.id) && (size >= 2)) { subreport_list_end = params->pen.subreport_list + ARRAY_SIZE(params->pen.subreport_list); /* Try to match a subreport */ for (subreport = params->pen.subreport_list; subreport < subreport_list_end; subreport++) { if (subreport->value != 0 && subreport->value == data[1]) { break; } } /* If a subreport matched */ if (subreport < subreport_list_end) { /* Change to subreport ID, and restart */ report_id = data[0] = subreport->id; continue; } else { return uclogic_raw_event_pen(drvdata, data, size); } } /* Tweak frame control reports, if necessary */ for (i = 0; i < ARRAY_SIZE(params->frame_list); i++) { if (report_id == params->frame_list[i].id) { return uclogic_raw_event_frame( drvdata, ¶ms->frame_list[i], data, size); } } break; } return 0; } static void uclogic_remove(struct hid_device *hdev) { struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev); del_timer_sync(&drvdata->inrange_timer); hid_hw_stop(hdev); kfree(drvdata->desc_ptr); uclogic_params_cleanup(&drvdata->params); } static const struct hid_device_id uclogic_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_PF1209) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP8060U) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP1062) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_WIRELESS_TABLET_TWHL850) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) }, { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) }, { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET2) }, { HID_USB_DEVICE(USB_VENDOR_ID_TRUST, USB_DEVICE_ID_TRUST_PANORA_TABLET) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_HUION_TABLET) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_YIYNOVA_TABLET) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_UGEE_TABLET_81) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_UGEE_TABLET_45) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_UGEE_TABLET_47) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GT5040) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_PARBLO_A610_PRO) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_G5) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_EX07S) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_RAINBOW_CV720) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_G540) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_G640) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO01) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO01_V2) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_L) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_MW), .driver_data = UCLOGIC_MOUSE_FRAME_QUIRK | UCLOGIC_BATTERY_QUIRK }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_S) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_SW), .driver_data = UCLOGIC_MOUSE_FRAME_QUIRK | UCLOGIC_BATTERY_QUIRK }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_STAR06) }, { } }; MODULE_DEVICE_TABLE(hid, uclogic_devices); static struct hid_driver uclogic_driver = { .name = "uclogic", .id_table = uclogic_devices, .probe = uclogic_probe, .remove = uclogic_remove, .report_fixup = uclogic_report_fixup, .raw_event = uclogic_raw_event, .input_mapping = uclogic_input_mapping, .input_configured = uclogic_input_configured, #ifdef CONFIG_PM .resume = uclogic_resume, .reset_resume = uclogic_resume, #endif }; module_hid_driver(uclogic_driver); MODULE_AUTHOR("Martin Rusko"); MODULE_AUTHOR("Nikolai Kondrashov"); MODULE_LICENSE("GPL"); #ifdef CONFIG_HID_KUNIT_TEST #include "hid-uclogic-core-test.c" #endif |
148 135 148 11 56 1 55 12 147 56 148 148 148 148 148 56 135 56 64 64 56 146 148 148 10 1 56 148 148 9 148 6 146 89 117 3 3 32 111 148 87 59 2 1 1 1 1 1 1 2 1 5 1 1 139 80 55 4 1 1 1 4 148 17 130 2 3 3 148 147 145 141 141 23 14 148 7 147 148 2 147 148 1 30 36 133 16 105 49 48 104 145 30 118 86 54 81 55 17 77 12 70 1 105 63 26 2 52 50 2 58 3 4 20 20 20 1 1 34 36 34 1 2 13 3 7 1 14 1 1 9 9 2 1 15 13 22 6 22 2 1 3 1 25 3 1 24 1 15 16 9 31 7 4 7 4 1 3 4 4 28 12 4 11 13 6 16 13 5 1 18 28 23 27 2 24 24 1 16 21 1 16 23 1 2 20 2 2 21 2 1 14 16 1 8 2 15 26 18 6 4 3 7 4 6 16 9 1 1 1 2 2 3 36 18 18 6 28 18 4 2 3 7 9 5 3 11 3 12 22 3 3 3 4 2 2 7 2 1 2 4 1 1 1 4 3 1 9 1 1 5 2 2 2 2 3 28 1 28 28 5 28 9 11 22 6 22 28 28 16 6 28 11 3 8 11 213 2 2 2 8 19 7 17 2 4 24 3 2 1 5 4 2 3 8 3 3 11 2 4 2 9 18 17 5 1 2 3 28 2 1 2 2 1 1 38 24 26 20 11 9 6 6 1 11 9 5 4 1 1 6 1 1 1 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Digital Audio (PCM) abstract layer / OSS compatible * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #if 0 #define PLUGIN_DEBUG #endif #if 0 #define OSS_DEBUG #endif #include <linux/init.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/time.h> #include <linux/vmalloc.h> #include <linux/module.h> #include <linux/math64.h> #include <linux/string.h> #include <linux/compat.h> #include <sound/core.h> #include <sound/minors.h> #include <sound/pcm.h> #include <sound/pcm_params.h> #include "pcm_plugin.h" #include <sound/info.h> #include <linux/soundcard.h> #include <sound/initval.h> #include <sound/mixer_oss.h> #define OSS_ALSAEMULVER _SIOR ('M', 249, int) static int dsp_map[SNDRV_CARDS]; static int adsp_map[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS-1)] = 1}; static bool nonblock_open = 1; MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>, Abramo Bagnara <abramo@alsa-project.org>"); MODULE_DESCRIPTION("PCM OSS emulation for ALSA."); MODULE_LICENSE("GPL"); module_param_array(dsp_map, int, NULL, 0444); MODULE_PARM_DESC(dsp_map, "PCM device number assigned to 1st OSS device."); module_param_array(adsp_map, int, NULL, 0444); MODULE_PARM_DESC(adsp_map, "PCM device number assigned to 2nd OSS device."); module_param(nonblock_open, bool, 0644); MODULE_PARM_DESC(nonblock_open, "Don't block opening busy PCM devices."); MODULE_ALIAS_SNDRV_MINOR(SNDRV_MINOR_OSS_PCM); MODULE_ALIAS_SNDRV_MINOR(SNDRV_MINOR_OSS_PCM1); static int snd_pcm_oss_get_rate(struct snd_pcm_oss_file *pcm_oss_file); static int snd_pcm_oss_get_channels(struct snd_pcm_oss_file *pcm_oss_file); static int snd_pcm_oss_get_format(struct snd_pcm_oss_file *pcm_oss_file); /* * helper functions to process hw_params */ static int snd_interval_refine_min(struct snd_interval *i, unsigned int min, int openmin) { int changed = 0; if (i->min < min) { i->min = min; i->openmin = openmin; changed = 1; } else if (i->min == min && !i->openmin && openmin) { i->openmin = 1; changed = 1; } if (i->integer) { if (i->openmin) { i->min++; i->openmin = 0; } } if (snd_interval_checkempty(i)) { snd_interval_none(i); return -EINVAL; } return changed; } static int snd_interval_refine_max(struct snd_interval *i, unsigned int max, int openmax) { int changed = 0; if (i->max > max) { i->max = max; i->openmax = openmax; changed = 1; } else if (i->max == max && !i->openmax && openmax) { i->openmax = 1; changed = 1; } if (i->integer) { if (i->openmax) { i->max--; i->openmax = 0; } } if (snd_interval_checkempty(i)) { snd_interval_none(i); return -EINVAL; } return changed; } static int snd_interval_refine_set(struct snd_interval *i, unsigned int val) { struct snd_interval t; t.empty = 0; t.min = t.max = val; t.openmin = t.openmax = 0; t.integer = 1; return snd_interval_refine(i, &t); } /** * snd_pcm_hw_param_value_min * @params: the hw_params instance * @var: parameter to retrieve * @dir: pointer to the direction (-1,0,1) or NULL * * Return the minimum value for field PAR. */ static unsigned int snd_pcm_hw_param_value_min(const struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, int *dir) { if (hw_is_mask(var)) { if (dir) *dir = 0; return snd_mask_min(hw_param_mask_c(params, var)); } if (hw_is_interval(var)) { const struct snd_interval *i = hw_param_interval_c(params, var); if (dir) *dir = i->openmin; return snd_interval_min(i); } return -EINVAL; } /** * snd_pcm_hw_param_value_max * @params: the hw_params instance * @var: parameter to retrieve * @dir: pointer to the direction (-1,0,1) or NULL * * Return the maximum value for field PAR. */ static int snd_pcm_hw_param_value_max(const struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, int *dir) { if (hw_is_mask(var)) { if (dir) *dir = 0; return snd_mask_max(hw_param_mask_c(params, var)); } if (hw_is_interval(var)) { const struct snd_interval *i = hw_param_interval_c(params, var); if (dir) *dir = - (int) i->openmax; return snd_interval_max(i); } return -EINVAL; } static int _snd_pcm_hw_param_mask(struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, const struct snd_mask *val) { int changed; changed = snd_mask_refine(hw_param_mask(params, var), val); if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } return changed; } static int snd_pcm_hw_param_mask(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, const struct snd_mask *val) { int changed = _snd_pcm_hw_param_mask(params, var, val); if (changed < 0) return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); if (err < 0) return err; } return 0; } static int _snd_pcm_hw_param_min(struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, unsigned int val, int dir) { int changed; int open = 0; if (dir) { if (dir > 0) { open = 1; } else if (dir < 0) { if (val > 0) { open = 1; val--; } } } if (hw_is_mask(var)) changed = snd_mask_refine_min(hw_param_mask(params, var), val + !!open); else if (hw_is_interval(var)) changed = snd_interval_refine_min(hw_param_interval(params, var), val, open); else return -EINVAL; if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } return changed; } /** * snd_pcm_hw_param_min * @pcm: PCM instance * @params: the hw_params instance * @var: parameter to retrieve * @val: minimal value * @dir: pointer to the direction (-1,0,1) or NULL * * Inside configuration space defined by PARAMS remove from PAR all * values < VAL. Reduce configuration space accordingly. * Return new minimum or -EINVAL if the configuration space is empty */ static int snd_pcm_hw_param_min(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, unsigned int val, int *dir) { int changed = _snd_pcm_hw_param_min(params, var, val, dir ? *dir : 0); if (changed < 0) return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); if (err < 0) return err; } return snd_pcm_hw_param_value_min(params, var, dir); } static int _snd_pcm_hw_param_max(struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, unsigned int val, int dir) { int changed; int open = 0; if (dir) { if (dir < 0) { open = 1; } else if (dir > 0) { open = 1; val++; } } if (hw_is_mask(var)) { if (val == 0 && open) { snd_mask_none(hw_param_mask(params, var)); changed = -EINVAL; } else changed = snd_mask_refine_max(hw_param_mask(params, var), val - !!open); } else if (hw_is_interval(var)) changed = snd_interval_refine_max(hw_param_interval(params, var), val, open); else return -EINVAL; if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } return changed; } /** * snd_pcm_hw_param_max * @pcm: PCM instance * @params: the hw_params instance * @var: parameter to retrieve * @val: maximal value * @dir: pointer to the direction (-1,0,1) or NULL * * Inside configuration space defined by PARAMS remove from PAR all * values >= VAL + 1. Reduce configuration space accordingly. * Return new maximum or -EINVAL if the configuration space is empty */ static int snd_pcm_hw_param_max(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, unsigned int val, int *dir) { int changed = _snd_pcm_hw_param_max(params, var, val, dir ? *dir : 0); if (changed < 0) return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); if (err < 0) return err; } return snd_pcm_hw_param_value_max(params, var, dir); } static int boundary_sub(int a, int adir, int b, int bdir, int *c, int *cdir) { adir = adir < 0 ? -1 : (adir > 0 ? 1 : 0); bdir = bdir < 0 ? -1 : (bdir > 0 ? 1 : 0); *c = a - b; *cdir = adir - bdir; if (*cdir == -2) { (*c)--; } else if (*cdir == 2) { (*c)++; } return 0; } static int boundary_lt(unsigned int a, int adir, unsigned int b, int bdir) { if (adir < 0) { a--; adir = 1; } else if (adir > 0) adir = 1; if (bdir < 0) { b--; bdir = 1; } else if (bdir > 0) bdir = 1; return a < b || (a == b && adir < bdir); } /* Return 1 if min is nearer to best than max */ static int boundary_nearer(int min, int mindir, int best, int bestdir, int max, int maxdir) { int dmin, dmindir; int dmax, dmaxdir; boundary_sub(best, bestdir, min, mindir, &dmin, &dmindir); boundary_sub(max, maxdir, best, bestdir, &dmax, &dmaxdir); return boundary_lt(dmin, dmindir, dmax, dmaxdir); } /** * snd_pcm_hw_param_near * @pcm: PCM instance * @params: the hw_params instance * @var: parameter to retrieve * @best: value to set * @dir: pointer to the direction (-1,0,1) or NULL * * Inside configuration space defined by PARAMS set PAR to the available value * nearest to VAL. Reduce configuration space accordingly. * This function cannot be called for SNDRV_PCM_HW_PARAM_ACCESS, * SNDRV_PCM_HW_PARAM_FORMAT, SNDRV_PCM_HW_PARAM_SUBFORMAT. * Return the value found. */ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, unsigned int best, int *dir) { struct snd_pcm_hw_params *save = NULL; int v; unsigned int saved_min; int last = 0; int min, max; int mindir, maxdir; int valdir = dir ? *dir : 0; /* FIXME */ if (best > INT_MAX) best = INT_MAX; min = max = best; mindir = maxdir = valdir; if (maxdir > 0) maxdir = 0; else if (maxdir == 0) maxdir = -1; else { maxdir = 1; max--; } save = kmalloc(sizeof(*save), GFP_KERNEL); if (save == NULL) return -ENOMEM; *save = *params; saved_min = min; min = snd_pcm_hw_param_min(pcm, params, var, min, &mindir); if (min >= 0) { struct snd_pcm_hw_params *params1; if (max < 0) goto _end; if ((unsigned int)min == saved_min && mindir == valdir) goto _end; params1 = kmalloc(sizeof(*params1), GFP_KERNEL); if (params1 == NULL) { kfree(save); return -ENOMEM; } *params1 = *save; max = snd_pcm_hw_param_max(pcm, params1, var, max, &maxdir); if (max < 0) { kfree(params1); goto _end; } if (boundary_nearer(max, maxdir, best, valdir, min, mindir)) { *params = *params1; last = 1; } kfree(params1); } else { *params = *save; max = snd_pcm_hw_param_max(pcm, params, var, max, &maxdir); if (max < 0) { kfree(save); return max; } last = 1; } _end: kfree(save); if (last) v = snd_pcm_hw_param_last(pcm, params, var, dir); else v = snd_pcm_hw_param_first(pcm, params, var, dir); return v; } static int _snd_pcm_hw_param_set(struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, unsigned int val, int dir) { int changed; if (hw_is_mask(var)) { struct snd_mask *m = hw_param_mask(params, var); if (val == 0 && dir < 0) { changed = -EINVAL; snd_mask_none(m); } else { if (dir > 0) val++; else if (dir < 0) val--; changed = snd_mask_refine_set(hw_param_mask(params, var), val); } } else if (hw_is_interval(var)) { struct snd_interval *i = hw_param_interval(params, var); if (val == 0 && dir < 0) { changed = -EINVAL; snd_interval_none(i); } else if (dir == 0) changed = snd_interval_refine_set(i, val); else { struct snd_interval t; t.openmin = 1; t.openmax = 1; t.empty = 0; t.integer = 0; if (dir < 0) { t.min = val - 1; t.max = val; } else { t.min = val; t.max = val+1; } changed = snd_interval_refine(i, &t); } } else return -EINVAL; if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } return changed; } /** * snd_pcm_hw_param_set * @pcm: PCM instance * @params: the hw_params instance * @var: parameter to retrieve * @val: value to set * @dir: pointer to the direction (-1,0,1) or NULL * * Inside configuration space defined by PARAMS remove from PAR all * values != VAL. Reduce configuration space accordingly. * Return VAL or -EINVAL if the configuration space is empty */ static int snd_pcm_hw_param_set(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, unsigned int val, int dir) { int changed = _snd_pcm_hw_param_set(params, var, val, dir); if (changed < 0) return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); if (err < 0) return err; } return snd_pcm_hw_param_value(params, var, NULL); } static int _snd_pcm_hw_param_setinteger(struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var) { int changed; changed = snd_interval_setinteger(hw_param_interval(params, var)); if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } return changed; } /* * plugin */ #ifdef CONFIG_SND_PCM_OSS_PLUGINS static int snd_pcm_oss_plugin_clear(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_pcm_plugin *plugin, *next; plugin = runtime->oss.plugin_first; while (plugin) { next = plugin->next; snd_pcm_plugin_free(plugin); plugin = next; } runtime->oss.plugin_first = runtime->oss.plugin_last = NULL; return 0; } static int snd_pcm_plugin_insert(struct snd_pcm_plugin *plugin) { struct snd_pcm_runtime *runtime = plugin->plug->runtime; plugin->next = runtime->oss.plugin_first; plugin->prev = NULL; if (runtime->oss.plugin_first) { runtime->oss.plugin_first->prev = plugin; runtime->oss.plugin_first = plugin; } else { runtime->oss.plugin_last = runtime->oss.plugin_first = plugin; } return 0; } int snd_pcm_plugin_append(struct snd_pcm_plugin *plugin) { struct snd_pcm_runtime *runtime = plugin->plug->runtime; plugin->next = NULL; plugin->prev = runtime->oss.plugin_last; if (runtime->oss.plugin_last) { runtime->oss.plugin_last->next = plugin; runtime->oss.plugin_last = plugin; } else { runtime->oss.plugin_last = runtime->oss.plugin_first = plugin; } return 0; } #endif /* CONFIG_SND_PCM_OSS_PLUGINS */ static long snd_pcm_oss_bytes(struct snd_pcm_substream *substream, long frames) { struct snd_pcm_runtime *runtime = substream->runtime; long buffer_size = snd_pcm_lib_buffer_bytes(substream); long bytes = frames_to_bytes(runtime, frames); if (buffer_size == runtime->oss.buffer_bytes) return bytes; #if BITS_PER_LONG >= 64 return runtime->oss.buffer_bytes * bytes / buffer_size; #else { u64 bsize = (u64)runtime->oss.buffer_bytes * (u64)bytes; return div_u64(bsize, buffer_size); } #endif } static long snd_pcm_alsa_frames(struct snd_pcm_substream *substream, long bytes) { struct snd_pcm_runtime *runtime = substream->runtime; long buffer_size = snd_pcm_lib_buffer_bytes(substream); if (buffer_size == runtime->oss.buffer_bytes) return bytes_to_frames(runtime, bytes); return bytes_to_frames(runtime, (buffer_size * bytes) / runtime->oss.buffer_bytes); } static inline snd_pcm_uframes_t get_hw_ptr_period(struct snd_pcm_runtime *runtime) { return runtime->hw_ptr_interrupt; } /* define extended formats in the recent OSS versions (if any) */ /* linear formats */ #define AFMT_S32_LE 0x00001000 #define AFMT_S32_BE 0x00002000 #define AFMT_S24_LE 0x00008000 #define AFMT_S24_BE 0x00010000 #define AFMT_S24_PACKED 0x00040000 /* other supported formats */ #define AFMT_FLOAT 0x00004000 #define AFMT_SPDIF_RAW 0x00020000 /* unsupported formats */ #define AFMT_AC3 0x00000400 #define AFMT_VORBIS 0x00000800 static snd_pcm_format_t snd_pcm_oss_format_from(int format) { switch (format) { case AFMT_MU_LAW: return SNDRV_PCM_FORMAT_MU_LAW; case AFMT_A_LAW: return SNDRV_PCM_FORMAT_A_LAW; case AFMT_IMA_ADPCM: return SNDRV_PCM_FORMAT_IMA_ADPCM; case AFMT_U8: return SNDRV_PCM_FORMAT_U8; case AFMT_S16_LE: return SNDRV_PCM_FORMAT_S16_LE; case AFMT_S16_BE: return SNDRV_PCM_FORMAT_S16_BE; case AFMT_S8: return SNDRV_PCM_FORMAT_S8; case AFMT_U16_LE: return SNDRV_PCM_FORMAT_U16_LE; case AFMT_U16_BE: return SNDRV_PCM_FORMAT_U16_BE; case AFMT_MPEG: return SNDRV_PCM_FORMAT_MPEG; case AFMT_S32_LE: return SNDRV_PCM_FORMAT_S32_LE; case AFMT_S32_BE: return SNDRV_PCM_FORMAT_S32_BE; case AFMT_S24_LE: return SNDRV_PCM_FORMAT_S24_LE; case AFMT_S24_BE: return SNDRV_PCM_FORMAT_S24_BE; case AFMT_S24_PACKED: return SNDRV_PCM_FORMAT_S24_3LE; case AFMT_FLOAT: return SNDRV_PCM_FORMAT_FLOAT; case AFMT_SPDIF_RAW: return SNDRV_PCM_FORMAT_IEC958_SUBFRAME; default: return SNDRV_PCM_FORMAT_U8; } } static int snd_pcm_oss_format_to(snd_pcm_format_t format) { switch (format) { case SNDRV_PCM_FORMAT_MU_LAW: return AFMT_MU_LAW; case SNDRV_PCM_FORMAT_A_LAW: return AFMT_A_LAW; case SNDRV_PCM_FORMAT_IMA_ADPCM: return AFMT_IMA_ADPCM; case SNDRV_PCM_FORMAT_U8: return AFMT_U8; case SNDRV_PCM_FORMAT_S16_LE: return AFMT_S16_LE; case SNDRV_PCM_FORMAT_S16_BE: return AFMT_S16_BE; case SNDRV_PCM_FORMAT_S8: return AFMT_S8; case SNDRV_PCM_FORMAT_U16_LE: return AFMT_U16_LE; case SNDRV_PCM_FORMAT_U16_BE: return AFMT_U16_BE; case SNDRV_PCM_FORMAT_MPEG: return AFMT_MPEG; case SNDRV_PCM_FORMAT_S32_LE: return AFMT_S32_LE; case SNDRV_PCM_FORMAT_S32_BE: return AFMT_S32_BE; case SNDRV_PCM_FORMAT_S24_LE: return AFMT_S24_LE; case SNDRV_PCM_FORMAT_S24_BE: return AFMT_S24_BE; case SNDRV_PCM_FORMAT_S24_3LE: return AFMT_S24_PACKED; case SNDRV_PCM_FORMAT_FLOAT: return AFMT_FLOAT; case SNDRV_PCM_FORMAT_IEC958_SUBFRAME: return AFMT_SPDIF_RAW; default: return -EINVAL; } } static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *oss_params, struct snd_pcm_hw_params *slave_params) { ssize_t s; ssize_t oss_buffer_size; ssize_t oss_period_size, oss_periods; ssize_t min_period_size, max_period_size; struct snd_pcm_runtime *runtime = substream->runtime; size_t oss_frame_size; oss_frame_size = snd_pcm_format_physical_width(params_format(oss_params)) * params_channels(oss_params) / 8; oss_buffer_size = snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, NULL); if (oss_buffer_size <= 0) return -EINVAL; oss_buffer_size = snd_pcm_plug_client_size(substream, oss_buffer_size * oss_frame_size); if (oss_buffer_size <= 0) return -EINVAL; oss_buffer_size = rounddown_pow_of_two(oss_buffer_size); if (atomic_read(&substream->mmap_count)) { if (oss_buffer_size > runtime->oss.mmap_bytes) oss_buffer_size = runtime->oss.mmap_bytes; } if (substream->oss.setup.period_size > 16) oss_period_size = substream->oss.setup.period_size; else if (runtime->oss.fragshift) { oss_period_size = 1 << runtime->oss.fragshift; if (oss_period_size > oss_buffer_size / 2) oss_period_size = oss_buffer_size / 2; } else { int sd; size_t bytes_per_sec = params_rate(oss_params) * snd_pcm_format_physical_width(params_format(oss_params)) * params_channels(oss_params) / 8; oss_period_size = oss_buffer_size; do { oss_period_size /= 2; } while (oss_period_size > bytes_per_sec); if (runtime->oss.subdivision == 0) { sd = 4; if (oss_period_size / sd > 4096) sd *= 2; if (oss_period_size / sd < 4096) sd = 1; } else sd = runtime->oss.subdivision; oss_period_size /= sd; if (oss_period_size < 16) oss_period_size = 16; } min_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_min(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); if (min_period_size > 0) { min_period_size *= oss_frame_size; min_period_size = roundup_pow_of_two(min_period_size); if (oss_period_size < min_period_size) oss_period_size = min_period_size; } max_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); if (max_period_size > 0) { max_period_size *= oss_frame_size; max_period_size = rounddown_pow_of_two(max_period_size); if (oss_period_size > max_period_size) oss_period_size = max_period_size; } oss_periods = oss_buffer_size / oss_period_size; if (substream->oss.setup.periods > 1) oss_periods = substream->oss.setup.periods; s = snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIODS, NULL); if (s > 0 && runtime->oss.maxfrags && s > runtime->oss.maxfrags) s = runtime->oss.maxfrags; if (oss_periods > s) oss_periods = s; s = snd_pcm_hw_param_value_min(slave_params, SNDRV_PCM_HW_PARAM_PERIODS, NULL); if (s < 2) s = 2; if (oss_periods < s) oss_periods = s; while (oss_period_size * oss_periods > oss_buffer_size) oss_period_size /= 2; if (oss_period_size < 16) return -EINVAL; /* don't allocate too large period; 1MB period must be enough */ if (oss_period_size > 1024 * 1024) return -ENOMEM; runtime->oss.period_bytes = oss_period_size; runtime->oss.period_frames = 1; runtime->oss.periods = oss_periods; return 0; } static int choose_rate(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, unsigned int best_rate) { const struct snd_interval *it; struct snd_pcm_hw_params *save; unsigned int rate, prev; save = kmalloc(sizeof(*save), GFP_KERNEL); if (save == NULL) return -ENOMEM; *save = *params; it = hw_param_interval_c(save, SNDRV_PCM_HW_PARAM_RATE); /* try multiples of the best rate */ rate = best_rate; for (;;) { if (it->max < rate || (it->max == rate && it->openmax)) break; if (it->min < rate || (it->min == rate && !it->openmin)) { int ret; ret = snd_pcm_hw_param_set(substream, params, SNDRV_PCM_HW_PARAM_RATE, rate, 0); if (ret == (int)rate) { kfree(save); return rate; } *params = *save; } prev = rate; rate += best_rate; if (rate <= prev) break; } /* not found, use the nearest rate */ kfree(save); return snd_pcm_hw_param_near(substream, params, SNDRV_PCM_HW_PARAM_RATE, best_rate, NULL); } /* parameter locking: returns immediately if tried during streaming */ static int lock_params(struct snd_pcm_runtime *runtime) { if (mutex_lock_interruptible(&runtime->oss.params_lock)) return -ERESTARTSYS; if (atomic_read(&runtime->oss.rw_ref)) { mutex_unlock(&runtime->oss.params_lock); return -EBUSY; } return 0; } static void unlock_params(struct snd_pcm_runtime *runtime) { mutex_unlock(&runtime->oss.params_lock); } static void snd_pcm_oss_release_buffers(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; kvfree(runtime->oss.buffer); runtime->oss.buffer = NULL; #ifdef CONFIG_SND_PCM_OSS_PLUGINS snd_pcm_oss_plugin_clear(substream); #endif } /* call with params_lock held */ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_pcm_hw_params *params, *sparams; struct snd_pcm_sw_params *sw_params; ssize_t oss_buffer_size, oss_period_size; size_t oss_frame_size; int err; int direct; snd_pcm_format_t format, sformat; int n; const struct snd_mask *sformat_mask; struct snd_mask mask; if (!runtime->oss.params) return 0; sw_params = kzalloc(sizeof(*sw_params), GFP_KERNEL); params = kmalloc(sizeof(*params), GFP_KERNEL); sparams = kmalloc(sizeof(*sparams), GFP_KERNEL); if (!sw_params || !params || !sparams) { err = -ENOMEM; goto failure; } if (atomic_read(&substream->mmap_count)) direct = 1; else direct = substream->oss.setup.direct; _snd_pcm_hw_params_any(sparams); _snd_pcm_hw_param_setinteger(sparams, SNDRV_PCM_HW_PARAM_PERIODS); _snd_pcm_hw_param_min(sparams, SNDRV_PCM_HW_PARAM_PERIODS, 2, 0); snd_mask_none(&mask); if (atomic_read(&substream->mmap_count)) snd_mask_set(&mask, (__force int)SNDRV_PCM_ACCESS_MMAP_INTERLEAVED); else { snd_mask_set(&mask, (__force int)SNDRV_PCM_ACCESS_RW_INTERLEAVED); if (!direct) snd_mask_set(&mask, (__force int)SNDRV_PCM_ACCESS_RW_NONINTERLEAVED); } err = snd_pcm_hw_param_mask(substream, sparams, SNDRV_PCM_HW_PARAM_ACCESS, &mask); if (err < 0) { pcm_dbg(substream->pcm, "No usable accesses\n"); err = -EINVAL; goto failure; } err = choose_rate(substream, sparams, runtime->oss.rate); if (err < 0) goto failure; err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_CHANNELS, runtime->oss.channels, NULL); if (err < 0) goto failure; format = snd_pcm_oss_format_from(runtime->oss.format); sformat_mask = hw_param_mask_c(sparams, SNDRV_PCM_HW_PARAM_FORMAT); if (direct) sformat = format; else sformat = snd_pcm_plug_slave_format(format, sformat_mask); if ((__force int)sformat < 0 || !snd_mask_test_format(sformat_mask, sformat)) { pcm_for_each_format(sformat) { if (snd_mask_test_format(sformat_mask, sformat) && snd_pcm_oss_format_to(sformat) >= 0) goto format_found; } pcm_dbg(substream->pcm, "Cannot find a format!!!\n"); err = -EINVAL; goto failure; } format_found: err = _snd_pcm_hw_param_set(sparams, SNDRV_PCM_HW_PARAM_FORMAT, (__force int)sformat, 0); if (err < 0) goto failure; if (direct) { memcpy(params, sparams, sizeof(*params)); } else { _snd_pcm_hw_params_any(params); _snd_pcm_hw_param_set(params, SNDRV_PCM_HW_PARAM_ACCESS, (__force int)SNDRV_PCM_ACCESS_RW_INTERLEAVED, 0); _snd_pcm_hw_param_set(params, SNDRV_PCM_HW_PARAM_FORMAT, (__force int)snd_pcm_oss_format_from(runtime->oss.format), 0); _snd_pcm_hw_param_set(params, SNDRV_PCM_HW_PARAM_CHANNELS, runtime->oss.channels, 0); _snd_pcm_hw_param_set(params, SNDRV_PCM_HW_PARAM_RATE, runtime->oss.rate, 0); pdprintf("client: access = %i, format = %i, channels = %i, rate = %i\n", params_access(params), params_format(params), params_channels(params), params_rate(params)); } pdprintf("slave: access = %i, format = %i, channels = %i, rate = %i\n", params_access(sparams), params_format(sparams), params_channels(sparams), params_rate(sparams)); oss_frame_size = snd_pcm_format_physical_width(params_format(params)) * params_channels(params) / 8; err = snd_pcm_oss_period_size(substream, params, sparams); if (err < 0) goto failure; n = snd_pcm_plug_slave_size(substream, runtime->oss.period_bytes / oss_frame_size); err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, n, NULL); if (err < 0) goto failure; err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIODS, runtime->oss.periods, NULL); if (err < 0) goto failure; snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_HW_PARAMS, sparams); if (err < 0) { pcm_dbg(substream->pcm, "HW_PARAMS failed: %i\n", err); goto failure; } #ifdef CONFIG_SND_PCM_OSS_PLUGINS snd_pcm_oss_plugin_clear(substream); if (!direct) { /* add necessary plugins */ err = snd_pcm_plug_format_plugins(substream, params, sparams); if (err < 0) { pcm_dbg(substream->pcm, "snd_pcm_plug_format_plugins failed: %i\n", err); goto failure; } if (runtime->oss.plugin_first) { struct snd_pcm_plugin *plugin; err = snd_pcm_plugin_build_io(substream, sparams, &plugin); if (err < 0) { pcm_dbg(substream->pcm, "snd_pcm_plugin_build_io failed: %i\n", err); goto failure; } if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { err = snd_pcm_plugin_append(plugin); } else { err = snd_pcm_plugin_insert(plugin); } if (err < 0) goto failure; } } #endif if (runtime->oss.trigger) { sw_params->start_threshold = 1; } else { sw_params->start_threshold = runtime->boundary; } if (atomic_read(&substream->mmap_count) || substream->stream == SNDRV_PCM_STREAM_CAPTURE) sw_params->stop_threshold = runtime->boundary; else sw_params->stop_threshold = runtime->buffer_size; sw_params->tstamp_mode = SNDRV_PCM_TSTAMP_NONE; sw_params->period_step = 1; sw_params->avail_min = substream->stream == SNDRV_PCM_STREAM_PLAYBACK ? 1 : runtime->period_size; if (atomic_read(&substream->mmap_count) || substream->oss.setup.nosilence) { sw_params->silence_threshold = 0; sw_params->silence_size = 0; } else { snd_pcm_uframes_t frames; frames = runtime->period_size + 16; if (frames > runtime->buffer_size) frames = runtime->buffer_size; sw_params->silence_threshold = frames; sw_params->silence_size = frames; } err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_SW_PARAMS, sw_params); if (err < 0) { pcm_dbg(substream->pcm, "SW_PARAMS failed: %i\n", err); goto failure; } runtime->oss.periods = params_periods(sparams); oss_period_size = snd_pcm_plug_client_size(substream, params_period_size(sparams)); if (oss_period_size < 0) { err = -EINVAL; goto failure; } #ifdef CONFIG_SND_PCM_OSS_PLUGINS if (runtime->oss.plugin_first) { err = snd_pcm_plug_alloc(substream, oss_period_size); if (err < 0) goto failure; } #endif oss_period_size = array_size(oss_period_size, oss_frame_size); oss_buffer_size = array_size(oss_period_size, runtime->oss.periods); if (oss_buffer_size <= 0) { err = -EINVAL; goto failure; } runtime->oss.period_bytes = oss_period_size; runtime->oss.buffer_bytes = oss_buffer_size; pdprintf("oss: period bytes = %i, buffer bytes = %i\n", runtime->oss.period_bytes, runtime->oss.buffer_bytes); pdprintf("slave: period_size = %i, buffer_size = %i\n", params_period_size(sparams), params_buffer_size(sparams)); runtime->oss.format = snd_pcm_oss_format_to(params_format(params)); runtime->oss.channels = params_channels(params); runtime->oss.rate = params_rate(params); kvfree(runtime->oss.buffer); runtime->oss.buffer = kvzalloc(runtime->oss.period_bytes, GFP_KERNEL); if (!runtime->oss.buffer) { err = -ENOMEM; goto failure; } runtime->oss.params = 0; runtime->oss.prepare = 1; runtime->oss.buffer_used = 0; if (runtime->dma_area) snd_pcm_format_set_silence(runtime->format, runtime->dma_area, bytes_to_samples(runtime, runtime->dma_bytes)); runtime->oss.period_frames = snd_pcm_alsa_frames(substream, oss_period_size); err = 0; failure: if (err) snd_pcm_oss_release_buffers(substream); kfree(sw_params); kfree(params); kfree(sparams); return err; } /* this one takes the lock by itself */ static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream, bool trylock) { struct snd_pcm_runtime *runtime = substream->runtime; int err; if (trylock) { if (!(mutex_trylock(&runtime->oss.params_lock))) return -EAGAIN; } else if (mutex_lock_interruptible(&runtime->oss.params_lock)) return -ERESTARTSYS; err = snd_pcm_oss_change_params_locked(substream); mutex_unlock(&runtime->oss.params_lock); return err; } static int snd_pcm_oss_get_active_substream(struct snd_pcm_oss_file *pcm_oss_file, struct snd_pcm_substream **r_substream) { int idx, err; struct snd_pcm_substream *asubstream = NULL, *substream; for (idx = 0; idx < 2; idx++) { substream = pcm_oss_file->streams[idx]; if (substream == NULL) continue; if (asubstream == NULL) asubstream = substream; if (substream->runtime->oss.params) { err = snd_pcm_oss_change_params(substream, false); if (err < 0) return err; } } if (!asubstream) return -EIO; if (r_substream) *r_substream = asubstream; return 0; } /* call with params_lock held */ /* NOTE: this always call PREPARE unconditionally no matter whether * runtime->oss.prepare is set or not */ static int snd_pcm_oss_prepare(struct snd_pcm_substream *substream) { int err; struct snd_pcm_runtime *runtime = substream->runtime; err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_PREPARE, NULL); if (err < 0) { pcm_dbg(substream->pcm, "snd_pcm_oss_prepare: SNDRV_PCM_IOCTL_PREPARE failed\n"); return err; } runtime->oss.prepare = 0; runtime->oss.prev_hw_ptr_period = 0; runtime->oss.period_ptr = 0; runtime->oss.buffer_used = 0; return 0; } static int snd_pcm_oss_make_ready(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime; int err; runtime = substream->runtime; if (runtime->oss.params) { err = snd_pcm_oss_change_params(substream, false); if (err < 0) return err; } if (runtime->oss.prepare) { if (mutex_lock_interruptible(&runtime->oss.params_lock)) return -ERESTARTSYS; err = snd_pcm_oss_prepare(substream); mutex_unlock(&runtime->oss.params_lock); if (err < 0) return err; } return 0; } /* call with params_lock held */ static int snd_pcm_oss_make_ready_locked(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime; int err; runtime = substream->runtime; if (runtime->oss.params) { err = snd_pcm_oss_change_params_locked(substream); if (err < 0) return err; } if (runtime->oss.prepare) { err = snd_pcm_oss_prepare(substream); if (err < 0) return err; } return 0; } static int snd_pcm_oss_capture_position_fixup(struct snd_pcm_substream *substream, snd_pcm_sframes_t *delay) { struct snd_pcm_runtime *runtime; snd_pcm_uframes_t frames; int err = 0; while (1) { err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DELAY, delay); if (err < 0) break; runtime = substream->runtime; if (*delay <= (snd_pcm_sframes_t)runtime->buffer_size) break; /* in case of overrun, skip whole periods like OSS/Linux driver does */ /* until avail(delay) <= buffer_size */ frames = (*delay - runtime->buffer_size) + runtime->period_size - 1; frames /= runtime->period_size; frames *= runtime->period_size; err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_FORWARD, &frames); if (err < 0) break; } return err; } snd_pcm_sframes_t snd_pcm_oss_write3(struct snd_pcm_substream *substream, const char *ptr, snd_pcm_uframes_t frames, int in_kernel) { struct snd_pcm_runtime *runtime = substream->runtime; int ret; while (1) { if (runtime->state == SNDRV_PCM_STATE_XRUN || runtime->state == SNDRV_PCM_STATE_SUSPENDED) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "pcm_oss: write: recovering from %s\n", runtime->state == SNDRV_PCM_STATE_XRUN ? "XRUN" : "SUSPEND"); #endif ret = snd_pcm_oss_prepare(substream); if (ret < 0) break; } mutex_unlock(&runtime->oss.params_lock); ret = __snd_pcm_lib_xfer(substream, (void *)ptr, true, frames, in_kernel); mutex_lock(&runtime->oss.params_lock); if (ret != -EPIPE && ret != -ESTRPIPE) break; /* test, if we can't store new data, because the stream */ /* has not been started */ if (runtime->state == SNDRV_PCM_STATE_PREPARED) return -EAGAIN; } return ret; } snd_pcm_sframes_t snd_pcm_oss_read3(struct snd_pcm_substream *substream, char *ptr, snd_pcm_uframes_t frames, int in_kernel) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_sframes_t delay; int ret; while (1) { if (runtime->state == SNDRV_PCM_STATE_XRUN || runtime->state == SNDRV_PCM_STATE_SUSPENDED) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "pcm_oss: read: recovering from %s\n", runtime->state == SNDRV_PCM_STATE_XRUN ? "XRUN" : "SUSPEND"); #endif ret = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DRAIN, NULL); if (ret < 0) break; } else if (runtime->state == SNDRV_PCM_STATE_SETUP) { ret = snd_pcm_oss_prepare(substream); if (ret < 0) break; } ret = snd_pcm_oss_capture_position_fixup(substream, &delay); if (ret < 0) break; mutex_unlock(&runtime->oss.params_lock); ret = __snd_pcm_lib_xfer(substream, (void *)ptr, true, frames, in_kernel); mutex_lock(&runtime->oss.params_lock); if (ret == -EPIPE) { if (runtime->state == SNDRV_PCM_STATE_DRAINING) { ret = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); if (ret < 0) break; } continue; } if (ret != -ESTRPIPE) break; } return ret; } #ifdef CONFIG_SND_PCM_OSS_PLUGINS snd_pcm_sframes_t snd_pcm_oss_writev3(struct snd_pcm_substream *substream, void **bufs, snd_pcm_uframes_t frames) { struct snd_pcm_runtime *runtime = substream->runtime; int ret; while (1) { if (runtime->state == SNDRV_PCM_STATE_XRUN || runtime->state == SNDRV_PCM_STATE_SUSPENDED) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "pcm_oss: writev: recovering from %s\n", runtime->state == SNDRV_PCM_STATE_XRUN ? "XRUN" : "SUSPEND"); #endif ret = snd_pcm_oss_prepare(substream); if (ret < 0) break; } ret = snd_pcm_kernel_writev(substream, bufs, frames); if (ret != -EPIPE && ret != -ESTRPIPE) break; /* test, if we can't store new data, because the stream */ /* has not been started */ if (runtime->state == SNDRV_PCM_STATE_PREPARED) return -EAGAIN; } return ret; } snd_pcm_sframes_t snd_pcm_oss_readv3(struct snd_pcm_substream *substream, void **bufs, snd_pcm_uframes_t frames) { struct snd_pcm_runtime *runtime = substream->runtime; int ret; while (1) { if (runtime->state == SNDRV_PCM_STATE_XRUN || runtime->state == SNDRV_PCM_STATE_SUSPENDED) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "pcm_oss: readv: recovering from %s\n", runtime->state == SNDRV_PCM_STATE_XRUN ? "XRUN" : "SUSPEND"); #endif ret = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DRAIN, NULL); if (ret < 0) break; } else if (runtime->state == SNDRV_PCM_STATE_SETUP) { ret = snd_pcm_oss_prepare(substream); if (ret < 0) break; } ret = snd_pcm_kernel_readv(substream, bufs, frames); if (ret != -EPIPE && ret != -ESTRPIPE) break; } return ret; } #endif /* CONFIG_SND_PCM_OSS_PLUGINS */ static ssize_t snd_pcm_oss_write2(struct snd_pcm_substream *substream, const char *buf, size_t bytes, int in_kernel) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_sframes_t frames, frames1; #ifdef CONFIG_SND_PCM_OSS_PLUGINS if (runtime->oss.plugin_first) { struct snd_pcm_plugin_channel *channels; size_t oss_frame_bytes = (runtime->oss.plugin_first->src_width * runtime->oss.plugin_first->src_format.channels) / 8; if (!in_kernel) { if (copy_from_user(runtime->oss.buffer, (const char __force __user *)buf, bytes)) return -EFAULT; buf = runtime->oss.buffer; } frames = bytes / oss_frame_bytes; frames1 = snd_pcm_plug_client_channels_buf(substream, (char *)buf, frames, &channels); if (frames1 < 0) return frames1; frames1 = snd_pcm_plug_write_transfer(substream, channels, frames1); if (frames1 <= 0) return frames1; bytes = frames1 * oss_frame_bytes; } else #endif { frames = bytes_to_frames(runtime, bytes); frames1 = snd_pcm_oss_write3(substream, buf, frames, in_kernel); if (frames1 <= 0) return frames1; bytes = frames_to_bytes(runtime, frames1); } return bytes; } static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const char __user *buf, size_t bytes) { size_t xfer = 0; ssize_t tmp = 0; struct snd_pcm_runtime *runtime = substream->runtime; if (atomic_read(&substream->mmap_count)) return -ENXIO; atomic_inc(&runtime->oss.rw_ref); while (bytes > 0) { if (mutex_lock_interruptible(&runtime->oss.params_lock)) { tmp = -ERESTARTSYS; break; } tmp = snd_pcm_oss_make_ready_locked(substream); if (tmp < 0) goto err; if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { tmp = bytes; if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes) tmp = runtime->oss.period_bytes - runtime->oss.buffer_used; if (tmp > 0) { if (copy_from_user(runtime->oss.buffer + runtime->oss.buffer_used, buf, tmp)) { tmp = -EFAULT; goto err; } } runtime->oss.buffer_used += tmp; buf += tmp; bytes -= tmp; xfer += tmp; if (substream->oss.setup.partialfrag || runtime->oss.buffer_used == runtime->oss.period_bytes) { tmp = snd_pcm_oss_write2(substream, runtime->oss.buffer + runtime->oss.period_ptr, runtime->oss.buffer_used - runtime->oss.period_ptr, 1); if (tmp <= 0) goto err; runtime->oss.bytes += tmp; runtime->oss.period_ptr += tmp; runtime->oss.period_ptr %= runtime->oss.period_bytes; if (runtime->oss.period_ptr == 0 || runtime->oss.period_ptr == runtime->oss.buffer_used) runtime->oss.buffer_used = 0; else if ((substream->f_flags & O_NONBLOCK) != 0) { tmp = -EAGAIN; goto err; } } } else { tmp = snd_pcm_oss_write2(substream, (const char __force *)buf, runtime->oss.period_bytes, 0); if (tmp <= 0) goto err; runtime->oss.bytes += tmp; buf += tmp; bytes -= tmp; xfer += tmp; if ((substream->f_flags & O_NONBLOCK) != 0 && tmp != runtime->oss.period_bytes) tmp = -EAGAIN; } err: mutex_unlock(&runtime->oss.params_lock); if (tmp < 0) break; if (signal_pending(current)) { tmp = -ERESTARTSYS; break; } tmp = 0; } atomic_dec(&runtime->oss.rw_ref); return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } static ssize_t snd_pcm_oss_read2(struct snd_pcm_substream *substream, char *buf, size_t bytes, int in_kernel) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_sframes_t frames, frames1; #ifdef CONFIG_SND_PCM_OSS_PLUGINS char __user *final_dst = (char __force __user *)buf; if (runtime->oss.plugin_first) { struct snd_pcm_plugin_channel *channels; size_t oss_frame_bytes = (runtime->oss.plugin_last->dst_width * runtime->oss.plugin_last->dst_format.channels) / 8; if (!in_kernel) buf = runtime->oss.buffer; frames = bytes / oss_frame_bytes; frames1 = snd_pcm_plug_client_channels_buf(substream, buf, frames, &channels); if (frames1 < 0) return frames1; frames1 = snd_pcm_plug_read_transfer(substream, channels, frames1); if (frames1 <= 0) return frames1; bytes = frames1 * oss_frame_bytes; if (!in_kernel && copy_to_user(final_dst, buf, bytes)) return -EFAULT; } else #endif { frames = bytes_to_frames(runtime, bytes); frames1 = snd_pcm_oss_read3(substream, buf, frames, in_kernel); if (frames1 <= 0) return frames1; bytes = frames_to_bytes(runtime, frames1); } return bytes; } static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __user *buf, size_t bytes) { size_t xfer = 0; ssize_t tmp = 0; struct snd_pcm_runtime *runtime = substream->runtime; if (atomic_read(&substream->mmap_count)) return -ENXIO; atomic_inc(&runtime->oss.rw_ref); while (bytes > 0) { if (mutex_lock_interruptible(&runtime->oss.params_lock)) { tmp = -ERESTARTSYS; break; } tmp = snd_pcm_oss_make_ready_locked(substream); if (tmp < 0) goto err; if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { if (runtime->oss.buffer_used == 0) { tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1); if (tmp <= 0) goto err; runtime->oss.bytes += tmp; runtime->oss.period_ptr = tmp; runtime->oss.buffer_used = tmp; } tmp = bytes; if ((size_t) tmp > runtime->oss.buffer_used) tmp = runtime->oss.buffer_used; if (copy_to_user(buf, runtime->oss.buffer + (runtime->oss.period_ptr - runtime->oss.buffer_used), tmp)) { tmp = -EFAULT; goto err; } buf += tmp; bytes -= tmp; xfer += tmp; runtime->oss.buffer_used -= tmp; } else { tmp = snd_pcm_oss_read2(substream, (char __force *)buf, runtime->oss.period_bytes, 0); if (tmp <= 0) goto err; runtime->oss.bytes += tmp; buf += tmp; bytes -= tmp; xfer += tmp; } err: mutex_unlock(&runtime->oss.params_lock); if (tmp < 0) break; if (signal_pending(current)) { tmp = -ERESTARTSYS; break; } tmp = 0; } atomic_dec(&runtime->oss.rw_ref); return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } static int snd_pcm_oss_reset(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; int i; for (i = 0; i < 2; i++) { substream = pcm_oss_file->streams[i]; if (!substream) continue; runtime = substream->runtime; snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); mutex_lock(&runtime->oss.params_lock); runtime->oss.prepare = 1; runtime->oss.buffer_used = 0; runtime->oss.prev_hw_ptr_period = 0; runtime->oss.period_ptr = 0; mutex_unlock(&runtime->oss.params_lock); } return 0; } static int snd_pcm_oss_post(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; int err; substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; if (substream != NULL) { err = snd_pcm_oss_make_ready(substream); if (err < 0) return err; snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_START, NULL); } /* note: all errors from the start action are ignored */ /* OSS apps do not know, how to handle them */ return 0; } static int snd_pcm_oss_sync1(struct snd_pcm_substream *substream, size_t size) { struct snd_pcm_runtime *runtime; ssize_t result = 0; snd_pcm_state_t state; long res; wait_queue_entry_t wait; runtime = substream->runtime; init_waitqueue_entry(&wait, current); add_wait_queue(&runtime->sleep, &wait); #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "sync1: size = %li\n", size); #endif while (1) { result = snd_pcm_oss_write2(substream, runtime->oss.buffer, size, 1); if (result > 0) { runtime->oss.buffer_used = 0; result = 0; break; } if (result != 0 && result != -EAGAIN) break; result = 0; set_current_state(TASK_INTERRUPTIBLE); snd_pcm_stream_lock_irq(substream); state = runtime->state; snd_pcm_stream_unlock_irq(substream); if (state != SNDRV_PCM_STATE_RUNNING) { set_current_state(TASK_RUNNING); break; } res = schedule_timeout(10 * HZ); if (signal_pending(current)) { result = -ERESTARTSYS; break; } if (res == 0) { pcm_err(substream->pcm, "OSS sync error - DMA timeout\n"); result = -EIO; break; } } remove_wait_queue(&runtime->sleep, &wait); return result; } static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file) { int err = 0; unsigned int saved_f_flags; struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; snd_pcm_format_t format; unsigned long width; size_t size; substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; if (substream != NULL) { runtime = substream->runtime; if (atomic_read(&substream->mmap_count)) goto __direct; atomic_inc(&runtime->oss.rw_ref); if (mutex_lock_interruptible(&runtime->oss.params_lock)) { atomic_dec(&runtime->oss.rw_ref); return -ERESTARTSYS; } err = snd_pcm_oss_make_ready_locked(substream); if (err < 0) goto unlock; format = snd_pcm_oss_format_from(runtime->oss.format); width = snd_pcm_format_physical_width(format); if (runtime->oss.buffer_used > 0) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "sync: buffer_used\n"); #endif size = (8 * (runtime->oss.period_bytes - runtime->oss.buffer_used) + 7) / width; snd_pcm_format_set_silence(format, runtime->oss.buffer + runtime->oss.buffer_used, size); err = snd_pcm_oss_sync1(substream, runtime->oss.period_bytes); if (err < 0) goto unlock; } else if (runtime->oss.period_ptr > 0) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "sync: period_ptr\n"); #endif size = runtime->oss.period_bytes - runtime->oss.period_ptr; snd_pcm_format_set_silence(format, runtime->oss.buffer, size * 8 / width); err = snd_pcm_oss_sync1(substream, size); if (err < 0) goto unlock; } /* * The ALSA's period might be a bit large than OSS one. * Fill the remain portion of ALSA period with zeros. */ size = runtime->control->appl_ptr % runtime->period_size; if (size > 0) { size = runtime->period_size - size; if (runtime->access == SNDRV_PCM_ACCESS_RW_INTERLEAVED) snd_pcm_lib_write(substream, NULL, size); else if (runtime->access == SNDRV_PCM_ACCESS_RW_NONINTERLEAVED) snd_pcm_lib_writev(substream, NULL, size); } unlock: mutex_unlock(&runtime->oss.params_lock); atomic_dec(&runtime->oss.rw_ref); if (err < 0) return err; /* * finish sync: drain the buffer */ __direct: saved_f_flags = substream->f_flags; substream->f_flags &= ~O_NONBLOCK; err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DRAIN, NULL); substream->f_flags = saved_f_flags; if (err < 0) return err; mutex_lock(&runtime->oss.params_lock); runtime->oss.prepare = 1; mutex_unlock(&runtime->oss.params_lock); } substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; if (substream != NULL) { err = snd_pcm_oss_make_ready(substream); if (err < 0) return err; runtime = substream->runtime; err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); if (err < 0) return err; mutex_lock(&runtime->oss.params_lock); runtime->oss.buffer_used = 0; runtime->oss.prepare = 1; mutex_unlock(&runtime->oss.params_lock); } return 0; } static int snd_pcm_oss_set_rate(struct snd_pcm_oss_file *pcm_oss_file, int rate) { int idx; for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; struct snd_pcm_runtime *runtime; int err; if (substream == NULL) continue; runtime = substream->runtime; if (rate < 1000) rate = 1000; else if (rate > 192000) rate = 192000; err = lock_params(runtime); if (err < 0) return err; if (runtime->oss.rate != rate) { runtime->oss.params = 1; runtime->oss.rate = rate; } unlock_params(runtime); } return snd_pcm_oss_get_rate(pcm_oss_file); } static int snd_pcm_oss_get_rate(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; int err; err = snd_pcm_oss_get_active_substream(pcm_oss_file, &substream); if (err < 0) return err; return substream->runtime->oss.rate; } static int snd_pcm_oss_set_channels(struct snd_pcm_oss_file *pcm_oss_file, unsigned int channels) { int idx; if (channels < 1) channels = 1; if (channels > 128) return -EINVAL; for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; struct snd_pcm_runtime *runtime; int err; if (substream == NULL) continue; runtime = substream->runtime; err = lock_params(runtime); if (err < 0) return err; if (runtime->oss.channels != channels) { runtime->oss.params = 1; runtime->oss.channels = channels; } unlock_params(runtime); } return snd_pcm_oss_get_channels(pcm_oss_file); } static int snd_pcm_oss_get_channels(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; int err; err = snd_pcm_oss_get_active_substream(pcm_oss_file, &substream); if (err < 0) return err; return substream->runtime->oss.channels; } static int snd_pcm_oss_get_block_size(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; int err; err = snd_pcm_oss_get_active_substream(pcm_oss_file, &substream); if (err < 0) return err; return substream->runtime->oss.period_bytes; } static int snd_pcm_oss_get_formats(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; int err; int direct; struct snd_pcm_hw_params *params; unsigned int formats = 0; const struct snd_mask *format_mask; int fmt; err = snd_pcm_oss_get_active_substream(pcm_oss_file, &substream); if (err < 0) return err; if (atomic_read(&substream->mmap_count)) direct = 1; else direct = substream->oss.setup.direct; if (!direct) return AFMT_MU_LAW | AFMT_U8 | AFMT_S16_LE | AFMT_S16_BE | AFMT_S8 | AFMT_U16_LE | AFMT_U16_BE | AFMT_S32_LE | AFMT_S32_BE | AFMT_S24_LE | AFMT_S24_BE | AFMT_S24_PACKED; params = kmalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; _snd_pcm_hw_params_any(params); err = snd_pcm_hw_refine(substream, params); if (err < 0) goto error; format_mask = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT); for (fmt = 0; fmt < 32; ++fmt) { if (snd_mask_test(format_mask, fmt)) { int f = snd_pcm_oss_format_to((__force snd_pcm_format_t)fmt); if (f >= 0) formats |= f; } } error: kfree(params); return err < 0 ? err : formats; } static int snd_pcm_oss_set_format(struct snd_pcm_oss_file *pcm_oss_file, int format) { int formats, idx; int err; if (format != AFMT_QUERY) { formats = snd_pcm_oss_get_formats(pcm_oss_file); if (formats < 0) return formats; if (!(formats & format)) format = AFMT_U8; for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; struct snd_pcm_runtime *runtime; if (substream == NULL) continue; runtime = substream->runtime; err = lock_params(runtime); if (err < 0) return err; if (runtime->oss.format != format) { runtime->oss.params = 1; runtime->oss.format = format; } unlock_params(runtime); } } return snd_pcm_oss_get_format(pcm_oss_file); } static int snd_pcm_oss_get_format(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; int err; err = snd_pcm_oss_get_active_substream(pcm_oss_file, &substream); if (err < 0) return err; return substream->runtime->oss.format; } static int snd_pcm_oss_set_subdivide1(struct snd_pcm_substream *substream, int subdivide) { struct snd_pcm_runtime *runtime; runtime = substream->runtime; if (subdivide == 0) { subdivide = runtime->oss.subdivision; if (subdivide == 0) subdivide = 1; return subdivide; } if (runtime->oss.subdivision || runtime->oss.fragshift) return -EINVAL; if (subdivide != 1 && subdivide != 2 && subdivide != 4 && subdivide != 8 && subdivide != 16) return -EINVAL; runtime->oss.subdivision = subdivide; runtime->oss.params = 1; return subdivide; } static int snd_pcm_oss_set_subdivide(struct snd_pcm_oss_file *pcm_oss_file, int subdivide) { int err = -EINVAL, idx; for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; struct snd_pcm_runtime *runtime; if (substream == NULL) continue; runtime = substream->runtime; err = lock_params(runtime); if (err < 0) return err; err = snd_pcm_oss_set_subdivide1(substream, subdivide); unlock_params(runtime); if (err < 0) return err; } return err; } static int snd_pcm_oss_set_fragment1(struct snd_pcm_substream *substream, unsigned int val) { struct snd_pcm_runtime *runtime; int fragshift; runtime = substream->runtime; if (runtime->oss.subdivision || runtime->oss.fragshift) return -EINVAL; fragshift = val & 0xffff; if (fragshift >= 25) /* should be large enough */ return -EINVAL; runtime->oss.fragshift = fragshift; runtime->oss.maxfrags = (val >> 16) & 0xffff; if (runtime->oss.fragshift < 4) /* < 16 */ runtime->oss.fragshift = 4; if (runtime->oss.maxfrags < 2) runtime->oss.maxfrags = 2; runtime->oss.params = 1; return 0; } static int snd_pcm_oss_set_fragment(struct snd_pcm_oss_file *pcm_oss_file, unsigned int val) { int err = -EINVAL, idx; for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; struct snd_pcm_runtime *runtime; if (substream == NULL) continue; runtime = substream->runtime; err = lock_params(runtime); if (err < 0) return err; err = snd_pcm_oss_set_fragment1(substream, val); unlock_params(runtime); if (err < 0) return err; } return err; } static int snd_pcm_oss_nonblock(struct file * file) { spin_lock(&file->f_lock); file->f_flags |= O_NONBLOCK; spin_unlock(&file->f_lock); return 0; } static int snd_pcm_oss_get_caps1(struct snd_pcm_substream *substream, int res) { if (substream == NULL) { res &= ~DSP_CAP_DUPLEX; return res; } #ifdef DSP_CAP_MULTI if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) if (substream->pstr->substream_count > 1) res |= DSP_CAP_MULTI; #endif /* DSP_CAP_REALTIME is set all times: */ /* all ALSA drivers can return actual pointer in ring buffer */ #if defined(DSP_CAP_REALTIME) && 0 { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->info & (SNDRV_PCM_INFO_BLOCK_TRANSFER|SNDRV_PCM_INFO_BATCH)) res &= ~DSP_CAP_REALTIME; } #endif return res; } static int snd_pcm_oss_get_caps(struct snd_pcm_oss_file *pcm_oss_file) { int result, idx; result = DSP_CAP_TRIGGER | DSP_CAP_MMAP | DSP_CAP_DUPLEX | DSP_CAP_REALTIME; for (idx = 0; idx < 2; idx++) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; result = snd_pcm_oss_get_caps1(substream, result); } result |= 0x0001; /* revision - same as SB AWE 64 */ return result; } static void snd_pcm_oss_simulate_fill(struct snd_pcm_substream *substream, snd_pcm_uframes_t hw_ptr) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_uframes_t appl_ptr; appl_ptr = hw_ptr + runtime->buffer_size; appl_ptr %= runtime->boundary; runtime->control->appl_ptr = appl_ptr; } static int snd_pcm_oss_set_trigger(struct snd_pcm_oss_file *pcm_oss_file, int trigger) { struct snd_pcm_runtime *runtime; struct snd_pcm_substream *psubstream = NULL, *csubstream = NULL; int err, cmd; #ifdef OSS_DEBUG pr_debug("pcm_oss: trigger = 0x%x\n", trigger); #endif psubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; csubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; if (psubstream) { err = snd_pcm_oss_make_ready(psubstream); if (err < 0) return err; } if (csubstream) { err = snd_pcm_oss_make_ready(csubstream); if (err < 0) return err; } if (psubstream) { runtime = psubstream->runtime; cmd = 0; if (mutex_lock_interruptible(&runtime->oss.params_lock)) return -ERESTARTSYS; if (trigger & PCM_ENABLE_OUTPUT) { if (runtime->oss.trigger) goto _skip1; if (atomic_read(&psubstream->mmap_count)) snd_pcm_oss_simulate_fill(psubstream, get_hw_ptr_period(runtime)); runtime->oss.trigger = 1; runtime->start_threshold = 1; cmd = SNDRV_PCM_IOCTL_START; } else { if (!runtime->oss.trigger) goto _skip1; runtime->oss.trigger = 0; runtime->start_threshold = runtime->boundary; cmd = SNDRV_PCM_IOCTL_DROP; runtime->oss.prepare = 1; } _skip1: mutex_unlock(&runtime->oss.params_lock); if (cmd) { err = snd_pcm_kernel_ioctl(psubstream, cmd, NULL); if (err < 0) return err; } } if (csubstream) { runtime = csubstream->runtime; cmd = 0; if (mutex_lock_interruptible(&runtime->oss.params_lock)) return -ERESTARTSYS; if (trigger & PCM_ENABLE_INPUT) { if (runtime->oss.trigger) goto _skip2; runtime->oss.trigger = 1; runtime->start_threshold = 1; cmd = SNDRV_PCM_IOCTL_START; } else { if (!runtime->oss.trigger) goto _skip2; runtime->oss.trigger = 0; runtime->start_threshold = runtime->boundary; cmd = SNDRV_PCM_IOCTL_DROP; runtime->oss.prepare = 1; } _skip2: mutex_unlock(&runtime->oss.params_lock); if (cmd) { err = snd_pcm_kernel_ioctl(csubstream, cmd, NULL); if (err < 0) return err; } } return 0; } static int snd_pcm_oss_get_trigger(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *psubstream = NULL, *csubstream = NULL; int result = 0; psubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; csubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; if (psubstream && psubstream->runtime && psubstream->runtime->oss.trigger) result |= PCM_ENABLE_OUTPUT; if (csubstream && csubstream->runtime && csubstream->runtime->oss.trigger) result |= PCM_ENABLE_INPUT; return result; } static int snd_pcm_oss_get_odelay(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; snd_pcm_sframes_t delay; int err; substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; if (substream == NULL) return -EINVAL; err = snd_pcm_oss_make_ready(substream); if (err < 0) return err; runtime = substream->runtime; if (runtime->oss.params || runtime->oss.prepare) return 0; err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DELAY, &delay); if (err == -EPIPE) delay = 0; /* hack for broken OSS applications */ else if (err < 0) return err; return snd_pcm_oss_bytes(substream, delay); } static int snd_pcm_oss_get_ptr(struct snd_pcm_oss_file *pcm_oss_file, int stream, struct count_info __user * _info) { struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; snd_pcm_sframes_t delay; int fixup; struct count_info info; int err; if (_info == NULL) return -EFAULT; substream = pcm_oss_file->streams[stream]; if (substream == NULL) return -EINVAL; err = snd_pcm_oss_make_ready(substream); if (err < 0) return err; runtime = substream->runtime; if (runtime->oss.params || runtime->oss.prepare) { memset(&info, 0, sizeof(info)); if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return 0; } if (stream == SNDRV_PCM_STREAM_PLAYBACK) { err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DELAY, &delay); if (err == -EPIPE || err == -ESTRPIPE || (! err && delay < 0)) { err = 0; delay = 0; fixup = 0; } else { fixup = runtime->oss.buffer_used; } } else { err = snd_pcm_oss_capture_position_fixup(substream, &delay); fixup = -runtime->oss.buffer_used; } if (err < 0) return err; info.ptr = snd_pcm_oss_bytes(substream, runtime->status->hw_ptr % runtime->buffer_size); if (atomic_read(&substream->mmap_count)) { snd_pcm_sframes_t n; delay = get_hw_ptr_period(runtime); n = delay - runtime->oss.prev_hw_ptr_period; if (n < 0) n += runtime->boundary; info.blocks = n / runtime->period_size; runtime->oss.prev_hw_ptr_period = delay; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) snd_pcm_oss_simulate_fill(substream, delay); info.bytes = snd_pcm_oss_bytes(substream, runtime->status->hw_ptr) & INT_MAX; } else { delay = snd_pcm_oss_bytes(substream, delay); if (stream == SNDRV_PCM_STREAM_PLAYBACK) { if (substream->oss.setup.buggyptr) info.blocks = (runtime->oss.buffer_bytes - delay - fixup) / runtime->oss.period_bytes; else info.blocks = (delay + fixup) / runtime->oss.period_bytes; info.bytes = (runtime->oss.bytes - delay) & INT_MAX; } else { delay += fixup; info.blocks = delay / runtime->oss.period_bytes; info.bytes = (runtime->oss.bytes + delay) & INT_MAX; } } if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return 0; } static int snd_pcm_oss_get_space(struct snd_pcm_oss_file *pcm_oss_file, int stream, struct audio_buf_info __user *_info) { struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; snd_pcm_sframes_t avail; int fixup; struct audio_buf_info info; int err; if (_info == NULL) return -EFAULT; substream = pcm_oss_file->streams[stream]; if (substream == NULL) return -EINVAL; runtime = substream->runtime; if (runtime->oss.params) { err = snd_pcm_oss_change_params(substream, false); if (err < 0) return err; } info.fragsize = runtime->oss.period_bytes; info.fragstotal = runtime->periods; if (runtime->oss.prepare) { if (stream == SNDRV_PCM_STREAM_PLAYBACK) { info.bytes = runtime->oss.period_bytes * runtime->oss.periods; info.fragments = runtime->oss.periods; } else { info.bytes = 0; info.fragments = 0; } } else { if (stream == SNDRV_PCM_STREAM_PLAYBACK) { err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DELAY, &avail); if (err == -EPIPE || err == -ESTRPIPE || (! err && avail < 0)) { avail = runtime->buffer_size; err = 0; fixup = 0; } else { avail = runtime->buffer_size - avail; fixup = -runtime->oss.buffer_used; } } else { err = snd_pcm_oss_capture_position_fixup(substream, &avail); fixup = runtime->oss.buffer_used; } if (err < 0) return err; info.bytes = snd_pcm_oss_bytes(substream, avail) + fixup; info.fragments = info.bytes / runtime->oss.period_bytes; } #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "pcm_oss: space: bytes = %i, fragments = %i, fragstotal = %i, fragsize = %i\n", info.bytes, info.fragments, info.fragstotal, info.fragsize); #endif if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return 0; } static int snd_pcm_oss_get_mapbuf(struct snd_pcm_oss_file *pcm_oss_file, int stream, struct buffmem_desc __user * _info) { // it won't be probably implemented // pr_debug("TODO: snd_pcm_oss_get_mapbuf\n"); return -EINVAL; } static const char *strip_task_path(const char *path) { const char *ptr, *ptrl = NULL; for (ptr = path; *ptr; ptr++) { if (*ptr == '/') ptrl = ptr + 1; } return ptrl; } static void snd_pcm_oss_look_for_setup(struct snd_pcm *pcm, int stream, const char *task_name, struct snd_pcm_oss_setup *rsetup) { struct snd_pcm_oss_setup *setup; mutex_lock(&pcm->streams[stream].oss.setup_mutex); do { for (setup = pcm->streams[stream].oss.setup_list; setup; setup = setup->next) { if (!strcmp(setup->task_name, task_name)) goto out; } } while ((task_name = strip_task_path(task_name)) != NULL); out: if (setup) *rsetup = *setup; mutex_unlock(&pcm->streams[stream].oss.setup_mutex); } static void snd_pcm_oss_release_substream(struct snd_pcm_substream *substream) { snd_pcm_oss_release_buffers(substream); substream->oss.oss = 0; } static void snd_pcm_oss_init_substream(struct snd_pcm_substream *substream, struct snd_pcm_oss_setup *setup, int minor) { struct snd_pcm_runtime *runtime; substream->oss.oss = 1; substream->oss.setup = *setup; if (setup->nonblock) substream->f_flags |= O_NONBLOCK; else if (setup->block) substream->f_flags &= ~O_NONBLOCK; runtime = substream->runtime; runtime->oss.params = 1; runtime->oss.trigger = 1; runtime->oss.rate = 8000; mutex_init(&runtime->oss.params_lock); switch (SNDRV_MINOR_OSS_DEVICE(minor)) { case SNDRV_MINOR_OSS_PCM_8: runtime->oss.format = AFMT_U8; break; case SNDRV_MINOR_OSS_PCM_16: runtime->oss.format = AFMT_S16_LE; break; default: runtime->oss.format = AFMT_MU_LAW; } runtime->oss.channels = 1; runtime->oss.fragshift = 0; runtime->oss.maxfrags = 0; runtime->oss.subdivision = 0; substream->pcm_release = snd_pcm_oss_release_substream; atomic_set(&runtime->oss.rw_ref, 0); } static int snd_pcm_oss_release_file(struct snd_pcm_oss_file *pcm_oss_file) { int cidx; if (!pcm_oss_file) return 0; for (cidx = 0; cidx < 2; ++cidx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[cidx]; if (substream) snd_pcm_release_substream(substream); } kfree(pcm_oss_file); return 0; } static int snd_pcm_oss_open_file(struct file *file, struct snd_pcm *pcm, struct snd_pcm_oss_file **rpcm_oss_file, int minor, struct snd_pcm_oss_setup *setup) { int idx, err; struct snd_pcm_oss_file *pcm_oss_file; struct snd_pcm_substream *substream; fmode_t f_mode = file->f_mode; if (rpcm_oss_file) *rpcm_oss_file = NULL; pcm_oss_file = kzalloc(sizeof(*pcm_oss_file), GFP_KERNEL); if (pcm_oss_file == NULL) return -ENOMEM; if ((f_mode & (FMODE_WRITE|FMODE_READ)) == (FMODE_WRITE|FMODE_READ) && (pcm->info_flags & SNDRV_PCM_INFO_HALF_DUPLEX)) f_mode = FMODE_WRITE; file->f_flags &= ~O_APPEND; for (idx = 0; idx < 2; idx++) { if (setup[idx].disable) continue; if (! pcm->streams[idx].substream_count) continue; /* no matching substream */ if (idx == SNDRV_PCM_STREAM_PLAYBACK) { if (! (f_mode & FMODE_WRITE)) continue; } else { if (! (f_mode & FMODE_READ)) continue; } err = snd_pcm_open_substream(pcm, idx, file, &substream); if (err < 0) { snd_pcm_oss_release_file(pcm_oss_file); return err; } pcm_oss_file->streams[idx] = substream; snd_pcm_oss_init_substream(substream, &setup[idx], minor); } if (!pcm_oss_file->streams[0] && !pcm_oss_file->streams[1]) { snd_pcm_oss_release_file(pcm_oss_file); return -EINVAL; } file->private_data = pcm_oss_file; if (rpcm_oss_file) *rpcm_oss_file = pcm_oss_file; return 0; } static int snd_task_name(struct task_struct *task, char *name, size_t size) { unsigned int idx; if (snd_BUG_ON(!task || !name || size < 2)) return -EINVAL; for (idx = 0; idx < sizeof(task->comm) && idx + 1 < size; idx++) name[idx] = task->comm[idx]; name[idx] = '\0'; return 0; } static int snd_pcm_oss_open(struct inode *inode, struct file *file) { int err; char task_name[32]; struct snd_pcm *pcm; struct snd_pcm_oss_file *pcm_oss_file; struct snd_pcm_oss_setup setup[2]; int nonblock; wait_queue_entry_t wait; err = nonseekable_open(inode, file); if (err < 0) return err; pcm = snd_lookup_oss_minor_data(iminor(inode), SNDRV_OSS_DEVICE_TYPE_PCM); if (pcm == NULL) { err = -ENODEV; goto __error1; } err = snd_card_file_add(pcm->card, file); if (err < 0) goto __error1; if (!try_module_get(pcm->card->module)) { err = -EFAULT; goto __error2; } if (snd_task_name(current, task_name, sizeof(task_name)) < 0) { err = -EFAULT; goto __error; } memset(setup, 0, sizeof(setup)); if (file->f_mode & FMODE_WRITE) snd_pcm_oss_look_for_setup(pcm, SNDRV_PCM_STREAM_PLAYBACK, task_name, &setup[0]); if (file->f_mode & FMODE_READ) snd_pcm_oss_look_for_setup(pcm, SNDRV_PCM_STREAM_CAPTURE, task_name, &setup[1]); nonblock = !!(file->f_flags & O_NONBLOCK); if (!nonblock) nonblock = nonblock_open; init_waitqueue_entry(&wait, current); add_wait_queue(&pcm->open_wait, &wait); mutex_lock(&pcm->open_mutex); while (1) { err = snd_pcm_oss_open_file(file, pcm, &pcm_oss_file, iminor(inode), setup); if (err >= 0) break; if (err == -EAGAIN) { if (nonblock) { err = -EBUSY; break; } } else break; set_current_state(TASK_INTERRUPTIBLE); mutex_unlock(&pcm->open_mutex); schedule(); mutex_lock(&pcm->open_mutex); if (pcm->card->shutdown) { err = -ENODEV; break; } if (signal_pending(current)) { err = -ERESTARTSYS; break; } } remove_wait_queue(&pcm->open_wait, &wait); mutex_unlock(&pcm->open_mutex); if (err < 0) goto __error; snd_card_unref(pcm->card); return err; __error: module_put(pcm->card->module); __error2: snd_card_file_remove(pcm->card, file); __error1: if (pcm) snd_card_unref(pcm->card); return err; } static int snd_pcm_oss_release(struct inode *inode, struct file *file) { struct snd_pcm *pcm; struct snd_pcm_substream *substream; struct snd_pcm_oss_file *pcm_oss_file; pcm_oss_file = file->private_data; substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; if (substream == NULL) substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; if (snd_BUG_ON(!substream)) return -ENXIO; pcm = substream->pcm; if (!pcm->card->shutdown) snd_pcm_oss_sync(pcm_oss_file); mutex_lock(&pcm->open_mutex); snd_pcm_oss_release_file(pcm_oss_file); mutex_unlock(&pcm->open_mutex); wake_up(&pcm->open_wait); module_put(pcm->card->module); snd_card_file_remove(pcm->card, file); return 0; } static long snd_pcm_oss_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct snd_pcm_oss_file *pcm_oss_file; int __user *p = (int __user *)arg; int res; pcm_oss_file = file->private_data; if (cmd == OSS_GETVERSION) return put_user(SNDRV_OSS_VERSION, p); if (cmd == OSS_ALSAEMULVER) return put_user(1, p); #if IS_REACHABLE(CONFIG_SND_MIXER_OSS) if (((cmd >> 8) & 0xff) == 'M') { /* mixer ioctl - for OSS compatibility */ struct snd_pcm_substream *substream; int idx; for (idx = 0; idx < 2; ++idx) { substream = pcm_oss_file->streams[idx]; if (substream != NULL) break; } if (snd_BUG_ON(idx >= 2)) return -ENXIO; return snd_mixer_oss_ioctl_card(substream->pcm->card, cmd, arg); } #endif if (((cmd >> 8) & 0xff) != 'P') return -EINVAL; #ifdef OSS_DEBUG pr_debug("pcm_oss: ioctl = 0x%x\n", cmd); #endif switch (cmd) { case SNDCTL_DSP_RESET: return snd_pcm_oss_reset(pcm_oss_file); case SNDCTL_DSP_SYNC: return snd_pcm_oss_sync(pcm_oss_file); case SNDCTL_DSP_SPEED: if (get_user(res, p)) return -EFAULT; res = snd_pcm_oss_set_rate(pcm_oss_file, res); if (res < 0) return res; return put_user(res, p); case SOUND_PCM_READ_RATE: res = snd_pcm_oss_get_rate(pcm_oss_file); if (res < 0) return res; return put_user(res, p); case SNDCTL_DSP_STEREO: if (get_user(res, p)) return -EFAULT; res = res > 0 ? 2 : 1; res = snd_pcm_oss_set_channels(pcm_oss_file, res); if (res < 0) return res; return put_user(--res, p); case SNDCTL_DSP_GETBLKSIZE: res = snd_pcm_oss_get_block_size(pcm_oss_file); if (res < 0) return res; return put_user(res, p); case SNDCTL_DSP_SETFMT: if (get_user(res, p)) return -EFAULT; res = snd_pcm_oss_set_format(pcm_oss_file, res); if (res < 0) return res; return put_user(res, p); case SOUND_PCM_READ_BITS: res = snd_pcm_oss_get_format(pcm_oss_file); if (res < 0) return res; return put_user(res, p); case SNDCTL_DSP_CHANNELS: if (get_user(res, p)) return -EFAULT; res = snd_pcm_oss_set_channels(pcm_oss_file, res); if (res < 0) return res; return put_user(res, p); case SOUND_PCM_READ_CHANNELS: res = snd_pcm_oss_get_channels(pcm_oss_file); if (res < 0) return res; return put_user(res, p); case SOUND_PCM_WRITE_FILTER: case SOUND_PCM_READ_FILTER: return -EIO; case SNDCTL_DSP_POST: return snd_pcm_oss_post(pcm_oss_file); case SNDCTL_DSP_SUBDIVIDE: if (get_user(res, p)) return -EFAULT; res = snd_pcm_oss_set_subdivide(pcm_oss_file, res); if (res < 0) return res; return put_user(res, p); case SNDCTL_DSP_SETFRAGMENT: if (get_user(res, p)) return -EFAULT; return snd_pcm_oss_set_fragment(pcm_oss_file, res); case SNDCTL_DSP_GETFMTS: res = snd_pcm_oss_get_formats(pcm_oss_file); if (res < 0) return res; return put_user(res, p); case SNDCTL_DSP_GETOSPACE: case SNDCTL_DSP_GETISPACE: return snd_pcm_oss_get_space(pcm_oss_file, cmd == SNDCTL_DSP_GETISPACE ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK, (struct audio_buf_info __user *) arg); case SNDCTL_DSP_NONBLOCK: return snd_pcm_oss_nonblock(file); case SNDCTL_DSP_GETCAPS: res = snd_pcm_oss_get_caps(pcm_oss_file); if (res < 0) return res; return put_user(res, p); case SNDCTL_DSP_GETTRIGGER: res = snd_pcm_oss_get_trigger(pcm_oss_file); if (res < 0) return res; return put_user(res, p); case SNDCTL_DSP_SETTRIGGER: if (get_user(res, p)) return -EFAULT; return snd_pcm_oss_set_trigger(pcm_oss_file, res); case SNDCTL_DSP_GETIPTR: case SNDCTL_DSP_GETOPTR: return snd_pcm_oss_get_ptr(pcm_oss_file, cmd == SNDCTL_DSP_GETIPTR ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK, (struct count_info __user *) arg); case SNDCTL_DSP_MAPINBUF: case SNDCTL_DSP_MAPOUTBUF: return snd_pcm_oss_get_mapbuf(pcm_oss_file, cmd == SNDCTL_DSP_MAPINBUF ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK, (struct buffmem_desc __user *) arg); case SNDCTL_DSP_SETSYNCRO: /* stop DMA now.. */ return 0; case SNDCTL_DSP_SETDUPLEX: if (snd_pcm_oss_get_caps(pcm_oss_file) & DSP_CAP_DUPLEX) return 0; return -EIO; case SNDCTL_DSP_GETODELAY: res = snd_pcm_oss_get_odelay(pcm_oss_file); if (res < 0) { /* it's for sure, some broken apps don't check for error codes */ put_user(0, p); return res; } return put_user(res, p); case SNDCTL_DSP_PROFILE: return 0; /* silently ignore */ default: pr_debug("pcm_oss: unknown command = 0x%x\n", cmd); } return -EINVAL; } #ifdef CONFIG_COMPAT /* all compatible */ static long snd_pcm_oss_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) { /* * Everything is compatbile except SNDCTL_DSP_MAPINBUF/SNDCTL_DSP_MAPOUTBUF, * which are not implemented for the native case either */ return snd_pcm_oss_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); } #else #define snd_pcm_oss_ioctl_compat NULL #endif static ssize_t snd_pcm_oss_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { struct snd_pcm_oss_file *pcm_oss_file; struct snd_pcm_substream *substream; pcm_oss_file = file->private_data; substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; if (substream == NULL) return -ENXIO; substream->f_flags = file->f_flags & O_NONBLOCK; #ifndef OSS_DEBUG return snd_pcm_oss_read1(substream, buf, count); #else { ssize_t res = snd_pcm_oss_read1(substream, buf, count); pcm_dbg(substream->pcm, "pcm_oss: read %li bytes (returned %li bytes)\n", (long)count, (long)res); return res; } #endif } static ssize_t snd_pcm_oss_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct snd_pcm_oss_file *pcm_oss_file; struct snd_pcm_substream *substream; long result; pcm_oss_file = file->private_data; substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; if (substream == NULL) return -ENXIO; substream->f_flags = file->f_flags & O_NONBLOCK; result = snd_pcm_oss_write1(substream, buf, count); #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "pcm_oss: write %li bytes (wrote %li bytes)\n", (long)count, (long)result); #endif return result; } static int snd_pcm_oss_playback_ready(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; if (atomic_read(&substream->mmap_count)) return runtime->oss.prev_hw_ptr_period != get_hw_ptr_period(runtime); else return snd_pcm_playback_avail(runtime) >= runtime->oss.period_frames; } static int snd_pcm_oss_capture_ready(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; if (atomic_read(&substream->mmap_count)) return runtime->oss.prev_hw_ptr_period != get_hw_ptr_period(runtime); else return snd_pcm_capture_avail(runtime) >= runtime->oss.period_frames; } static __poll_t snd_pcm_oss_poll(struct file *file, poll_table * wait) { struct snd_pcm_oss_file *pcm_oss_file; __poll_t mask; struct snd_pcm_substream *psubstream = NULL, *csubstream = NULL; pcm_oss_file = file->private_data; psubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; csubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; mask = 0; if (psubstream != NULL) { struct snd_pcm_runtime *runtime = psubstream->runtime; poll_wait(file, &runtime->sleep, wait); snd_pcm_stream_lock_irq(psubstream); if (runtime->state != SNDRV_PCM_STATE_DRAINING && (runtime->state != SNDRV_PCM_STATE_RUNNING || snd_pcm_oss_playback_ready(psubstream))) mask |= EPOLLOUT | EPOLLWRNORM; snd_pcm_stream_unlock_irq(psubstream); } if (csubstream != NULL) { struct snd_pcm_runtime *runtime = csubstream->runtime; snd_pcm_state_t ostate; poll_wait(file, &runtime->sleep, wait); snd_pcm_stream_lock_irq(csubstream); ostate = runtime->state; if (ostate != SNDRV_PCM_STATE_RUNNING || snd_pcm_oss_capture_ready(csubstream)) mask |= EPOLLIN | EPOLLRDNORM; snd_pcm_stream_unlock_irq(csubstream); if (ostate != SNDRV_PCM_STATE_RUNNING && runtime->oss.trigger) { struct snd_pcm_oss_file ofile; memset(&ofile, 0, sizeof(ofile)); ofile.streams[SNDRV_PCM_STREAM_CAPTURE] = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; runtime->oss.trigger = 0; snd_pcm_oss_set_trigger(&ofile, PCM_ENABLE_INPUT); } } return mask; } static int snd_pcm_oss_mmap(struct file *file, struct vm_area_struct *area) { struct snd_pcm_oss_file *pcm_oss_file; struct snd_pcm_substream *substream = NULL; struct snd_pcm_runtime *runtime; int err; #ifdef OSS_DEBUG pr_debug("pcm_oss: mmap begin\n"); #endif pcm_oss_file = file->private_data; switch ((area->vm_flags & (VM_READ | VM_WRITE))) { case VM_READ | VM_WRITE: substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; if (substream) break; fallthrough; case VM_READ: substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; break; case VM_WRITE: substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; break; default: return -EINVAL; } /* set VM_READ access as well to fix memset() routines that do reads before writes (to improve performance) */ vm_flags_set(area, VM_READ); if (substream == NULL) return -ENXIO; runtime = substream->runtime; if (!(runtime->info & SNDRV_PCM_INFO_MMAP_VALID)) return -EIO; if (runtime->info & SNDRV_PCM_INFO_INTERLEAVED) runtime->access = SNDRV_PCM_ACCESS_MMAP_INTERLEAVED; else return -EIO; if (runtime->oss.params) { /* use mutex_trylock() for params_lock for avoiding a deadlock * between mmap_lock and params_lock taken by * copy_from/to_user() in snd_pcm_oss_write/read() */ err = snd_pcm_oss_change_params(substream, true); if (err < 0) return err; } #ifdef CONFIG_SND_PCM_OSS_PLUGINS if (runtime->oss.plugin_first != NULL) return -EIO; #endif if (area->vm_pgoff != 0) return -EINVAL; err = snd_pcm_mmap_data(substream, file, area); if (err < 0) return err; runtime->oss.mmap_bytes = area->vm_end - area->vm_start; runtime->silence_threshold = 0; runtime->silence_size = 0; #ifdef OSS_DEBUG pr_debug("pcm_oss: mmap ok, bytes = 0x%x\n", runtime->oss.mmap_bytes); #endif /* In mmap mode we never stop */ runtime->stop_threshold = runtime->boundary; return 0; } #ifdef CONFIG_SND_VERBOSE_PROCFS /* * /proc interface */ static void snd_pcm_oss_proc_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_str *pstr = entry->private_data; struct snd_pcm_oss_setup *setup = pstr->oss.setup_list; mutex_lock(&pstr->oss.setup_mutex); while (setup) { snd_iprintf(buffer, "%s %u %u%s%s%s%s%s%s\n", setup->task_name, setup->periods, setup->period_size, setup->disable ? " disable" : "", setup->direct ? " direct" : "", setup->block ? " block" : "", setup->nonblock ? " non-block" : "", setup->partialfrag ? " partial-frag" : "", setup->nosilence ? " no-silence" : ""); setup = setup->next; } mutex_unlock(&pstr->oss.setup_mutex); } static void snd_pcm_oss_proc_free_setup_list(struct snd_pcm_str * pstr) { struct snd_pcm_oss_setup *setup, *setupn; for (setup = pstr->oss.setup_list, pstr->oss.setup_list = NULL; setup; setup = setupn) { setupn = setup->next; kfree(setup->task_name); kfree(setup); } pstr->oss.setup_list = NULL; } static void snd_pcm_oss_proc_write(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_str *pstr = entry->private_data; char line[128], str[32], task_name[32]; const char *ptr; int idx1; struct snd_pcm_oss_setup *setup, *setup1, template; while (!snd_info_get_line(buffer, line, sizeof(line))) { mutex_lock(&pstr->oss.setup_mutex); memset(&template, 0, sizeof(template)); ptr = snd_info_get_str(task_name, line, sizeof(task_name)); if (!strcmp(task_name, "clear") || !strcmp(task_name, "erase")) { snd_pcm_oss_proc_free_setup_list(pstr); mutex_unlock(&pstr->oss.setup_mutex); continue; } for (setup = pstr->oss.setup_list; setup; setup = setup->next) { if (!strcmp(setup->task_name, task_name)) { template = *setup; break; } } ptr = snd_info_get_str(str, ptr, sizeof(str)); template.periods = simple_strtoul(str, NULL, 10); ptr = snd_info_get_str(str, ptr, sizeof(str)); template.period_size = simple_strtoul(str, NULL, 10); for (idx1 = 31; idx1 >= 0; idx1--) if (template.period_size & (1 << idx1)) break; for (idx1--; idx1 >= 0; idx1--) template.period_size &= ~(1 << idx1); do { ptr = snd_info_get_str(str, ptr, sizeof(str)); if (!strcmp(str, "disable")) { template.disable = 1; } else if (!strcmp(str, "direct")) { template.direct = 1; } else if (!strcmp(str, "block")) { template.block = 1; } else if (!strcmp(str, "non-block")) { template.nonblock = 1; } else if (!strcmp(str, "partial-frag")) { template.partialfrag = 1; } else if (!strcmp(str, "no-silence")) { template.nosilence = 1; } else if (!strcmp(str, "buggy-ptr")) { template.buggyptr = 1; } } while (*str); if (setup == NULL) { setup = kmalloc(sizeof(*setup), GFP_KERNEL); if (! setup) { buffer->error = -ENOMEM; mutex_unlock(&pstr->oss.setup_mutex); return; } if (pstr->oss.setup_list == NULL) pstr->oss.setup_list = setup; else { for (setup1 = pstr->oss.setup_list; setup1->next; setup1 = setup1->next); setup1->next = setup; } template.task_name = kstrdup(task_name, GFP_KERNEL); if (! template.task_name) { kfree(setup); buffer->error = -ENOMEM; mutex_unlock(&pstr->oss.setup_mutex); return; } } *setup = template; mutex_unlock(&pstr->oss.setup_mutex); } } static void snd_pcm_oss_proc_init(struct snd_pcm *pcm) { int stream; for (stream = 0; stream < 2; ++stream) { struct snd_info_entry *entry; struct snd_pcm_str *pstr = &pcm->streams[stream]; if (pstr->substream_count == 0) continue; entry = snd_info_create_card_entry(pcm->card, "oss", pstr->proc_root); if (entry) { entry->content = SNDRV_INFO_CONTENT_TEXT; entry->mode = S_IFREG | 0644; entry->c.text.read = snd_pcm_oss_proc_read; entry->c.text.write = snd_pcm_oss_proc_write; entry->private_data = pstr; if (snd_info_register(entry) < 0) { snd_info_free_entry(entry); entry = NULL; } } pstr->oss.proc_entry = entry; } } static void snd_pcm_oss_proc_done(struct snd_pcm *pcm) { int stream; for (stream = 0; stream < 2; ++stream) { struct snd_pcm_str *pstr = &pcm->streams[stream]; snd_info_free_entry(pstr->oss.proc_entry); pstr->oss.proc_entry = NULL; snd_pcm_oss_proc_free_setup_list(pstr); } } #else /* !CONFIG_SND_VERBOSE_PROCFS */ static inline void snd_pcm_oss_proc_init(struct snd_pcm *pcm) { } static inline void snd_pcm_oss_proc_done(struct snd_pcm *pcm) { } #endif /* CONFIG_SND_VERBOSE_PROCFS */ /* * ENTRY functions */ static const struct file_operations snd_pcm_oss_f_reg = { .owner = THIS_MODULE, .read = snd_pcm_oss_read, .write = snd_pcm_oss_write, .open = snd_pcm_oss_open, .release = snd_pcm_oss_release, .llseek = no_llseek, .poll = snd_pcm_oss_poll, .unlocked_ioctl = snd_pcm_oss_ioctl, .compat_ioctl = snd_pcm_oss_ioctl_compat, .mmap = snd_pcm_oss_mmap, }; static void register_oss_dsp(struct snd_pcm *pcm, int index) { if (snd_register_oss_device(SNDRV_OSS_DEVICE_TYPE_PCM, pcm->card, index, &snd_pcm_oss_f_reg, pcm) < 0) { pcm_err(pcm, "unable to register OSS PCM device %i:%i\n", pcm->card->number, pcm->device); } } static int snd_pcm_oss_register_minor(struct snd_pcm *pcm) { pcm->oss.reg = 0; if (dsp_map[pcm->card->number] == (int)pcm->device) { char name[128]; int duplex; register_oss_dsp(pcm, 0); duplex = (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream_count > 0 && pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream_count && !(pcm->info_flags & SNDRV_PCM_INFO_HALF_DUPLEX)); sprintf(name, "%s%s", pcm->name, duplex ? " (DUPLEX)" : ""); #ifdef SNDRV_OSS_INFO_DEV_AUDIO snd_oss_info_register(SNDRV_OSS_INFO_DEV_AUDIO, pcm->card->number, name); #endif pcm->oss.reg++; pcm->oss.reg_mask |= 1; } if (adsp_map[pcm->card->number] == (int)pcm->device) { register_oss_dsp(pcm, 1); pcm->oss.reg++; pcm->oss.reg_mask |= 2; } if (pcm->oss.reg) snd_pcm_oss_proc_init(pcm); return 0; } static int snd_pcm_oss_disconnect_minor(struct snd_pcm *pcm) { if (pcm->oss.reg) { if (pcm->oss.reg_mask & 1) { pcm->oss.reg_mask &= ~1; snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_PCM, pcm->card, 0); } if (pcm->oss.reg_mask & 2) { pcm->oss.reg_mask &= ~2; snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_PCM, pcm->card, 1); } if (dsp_map[pcm->card->number] == (int)pcm->device) { #ifdef SNDRV_OSS_INFO_DEV_AUDIO snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_AUDIO, pcm->card->number); #endif } pcm->oss.reg = 0; } return 0; } static int snd_pcm_oss_unregister_minor(struct snd_pcm *pcm) { snd_pcm_oss_disconnect_minor(pcm); snd_pcm_oss_proc_done(pcm); return 0; } static struct snd_pcm_notify snd_pcm_oss_notify = { .n_register = snd_pcm_oss_register_minor, .n_disconnect = snd_pcm_oss_disconnect_minor, .n_unregister = snd_pcm_oss_unregister_minor, }; static int __init alsa_pcm_oss_init(void) { int i; int err; /* check device map table */ for (i = 0; i < SNDRV_CARDS; i++) { if (dsp_map[i] < 0 || dsp_map[i] >= SNDRV_PCM_DEVICES) { pr_err("ALSA: pcm_oss: invalid dsp_map[%d] = %d\n", i, dsp_map[i]); dsp_map[i] = 0; } if (adsp_map[i] < 0 || adsp_map[i] >= SNDRV_PCM_DEVICES) { pr_err("ALSA: pcm_oss: invalid adsp_map[%d] = %d\n", i, adsp_map[i]); adsp_map[i] = 1; } } err = snd_pcm_notify(&snd_pcm_oss_notify, 0); if (err < 0) return err; return 0; } static void __exit alsa_pcm_oss_exit(void) { snd_pcm_notify(&snd_pcm_oss_notify, 1); } module_init(alsa_pcm_oss_init) module_exit(alsa_pcm_oss_exit) |
944 9 564 392 405 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * net busy poll support * Copyright(c) 2013 Intel Corporation. * * Author: Eliezer Tamir * * Contact Information: * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> */ #ifndef _LINUX_NET_BUSY_POLL_H #define _LINUX_NET_BUSY_POLL_H #include <linux/netdevice.h> #include <linux/sched/clock.h> #include <linux/sched/signal.h> #include <net/ip.h> #include <net/xdp.h> /* 0 - Reserved to indicate value not set * 1..NR_CPUS - Reserved for sender_cpu * NR_CPUS+1..~0 - Region available for NAPI IDs */ #define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1)) #define BUSY_POLL_BUDGET 8 #ifdef CONFIG_NET_RX_BUSY_POLL struct napi_struct; extern unsigned int sysctl_net_busy_read __read_mostly; extern unsigned int sysctl_net_busy_poll __read_mostly; static inline bool net_busy_loop_on(void) { return READ_ONCE(sysctl_net_busy_poll); } static inline bool sk_can_busy_loop(const struct sock *sk) { return READ_ONCE(sk->sk_ll_usec) && !signal_pending(current); } bool sk_busy_loop_end(void *p, unsigned long start_time); void napi_busy_loop(unsigned int napi_id, bool (*loop_end)(void *, unsigned long), void *loop_end_arg, bool prefer_busy_poll, u16 budget); #else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) { return 0; } static inline bool sk_can_busy_loop(struct sock *sk) { return false; } #endif /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long busy_loop_current_time(void) { #ifdef CONFIG_NET_RX_BUSY_POLL return (unsigned long)(local_clock() >> 10); #else return 0; #endif } /* in poll/select we use the global sysctl_net_ll_poll value */ static inline bool busy_loop_timeout(unsigned long start_time) { #ifdef CONFIG_NET_RX_BUSY_POLL unsigned long bp_usec = READ_ONCE(sysctl_net_busy_poll); if (bp_usec) { unsigned long end_time = start_time + bp_usec; unsigned long now = busy_loop_current_time(); return time_after(now, end_time); } #endif return true; } static inline bool sk_busy_loop_timeout(struct sock *sk, unsigned long start_time) { #ifdef CONFIG_NET_RX_BUSY_POLL unsigned long bp_usec = READ_ONCE(sk->sk_ll_usec); if (bp_usec) { unsigned long end_time = start_time + bp_usec; unsigned long now = busy_loop_current_time(); return time_after(now, end_time); } #endif return true; } static inline void sk_busy_loop(struct sock *sk, int nonblock) { #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int napi_id = READ_ONCE(sk->sk_napi_id); if (napi_id >= MIN_NAPI_ID) napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk, READ_ONCE(sk->sk_prefer_busy_poll), READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET); #endif } /* used in the NIC receive handler to mark the skb */ static inline void skb_mark_napi_id(struct sk_buff *skb, struct napi_struct *napi) { #ifdef CONFIG_NET_RX_BUSY_POLL /* If the skb was already marked with a valid NAPI ID, avoid overwriting * it. */ if (skb->napi_id < MIN_NAPI_ID) skb->napi_id = napi->napi_id; #endif } /* used in the protocol hanlder to propagate the napi_id to the socket */ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL if (unlikely(READ_ONCE(sk->sk_napi_id) != skb->napi_id)) WRITE_ONCE(sk->sk_napi_id, skb->napi_id); #endif sk_rx_queue_update(sk, skb); } /* Variant of sk_mark_napi_id() for passive flow setup, * as sk->sk_napi_id and sk->sk_rx_queue_mapping content * needs to be set. */ static inline void sk_mark_napi_id_set(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL WRITE_ONCE(sk->sk_napi_id, skb->napi_id); #endif sk_rx_queue_set(sk, skb); } static inline void __sk_mark_napi_id_once(struct sock *sk, unsigned int napi_id) { #ifdef CONFIG_NET_RX_BUSY_POLL if (!READ_ONCE(sk->sk_napi_id)) WRITE_ONCE(sk->sk_napi_id, napi_id); #endif } /* variant used for unconnected sockets */ static inline void sk_mark_napi_id_once(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL __sk_mark_napi_id_once(sk, skb->napi_id); #endif } static inline void sk_mark_napi_id_once_xdp(struct sock *sk, const struct xdp_buff *xdp) { #ifdef CONFIG_NET_RX_BUSY_POLL __sk_mark_napi_id_once(sk, xdp->rxq->napi_id); #endif } #endif /* _LINUX_NET_BUSY_POLL_H */ |
6 6 6 2 2 6 5 1 1 1 1 64 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 | // SPDX-License-Identifier: GPL-2.0-only /* Helper handling for netfilter. */ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> * (C) 2006-2012 Patrick McHardy <kaber@trash.net> */ #include <linux/types.h> #include <linux/netfilter.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <linux/stddef.h> #include <linux/random.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/rculist.h> #include <linux/rtnetlink.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_log.h> #include <net/ip.h> static DEFINE_MUTEX(nf_ct_helper_mutex); struct hlist_head *nf_ct_helper_hash __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_helper_hash); unsigned int nf_ct_helper_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_helper_hsize); static unsigned int nf_ct_helper_count __read_mostly; static DEFINE_MUTEX(nf_ct_nat_helpers_mutex); static struct list_head nf_ct_nat_helpers __read_mostly; /* Stupid hash, but collision free for the default registrations of the * helpers currently in the kernel. */ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) { return (((tuple->src.l3num << 8) | tuple->dst.protonum) ^ (__force __u16)tuple->src.u.all) % nf_ct_helper_hsize; } struct nf_conntrack_helper * __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; unsigned int i; for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) { if (strcmp(h->name, name)) continue; if (h->tuple.src.l3num != NFPROTO_UNSPEC && h->tuple.src.l3num != l3num) continue; if (h->tuple.dst.protonum == protonum) return h; } } return NULL; } EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find); struct nf_conntrack_helper * nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; rcu_read_lock(); h = __nf_conntrack_helper_find(name, l3num, protonum); #ifdef CONFIG_MODULES if (h == NULL) { rcu_read_unlock(); if (request_module("nfct-helper-%s", name) == 0) { rcu_read_lock(); h = __nf_conntrack_helper_find(name, l3num, protonum); } else { return h; } } #endif if (h != NULL && !try_module_get(h->me)) h = NULL; if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) { module_put(h->me); h = NULL; } rcu_read_unlock(); return h; } EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); void nf_conntrack_helper_put(struct nf_conntrack_helper *helper) { refcount_dec(&helper->refcnt); module_put(helper->me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); static struct nf_conntrack_nat_helper * nf_conntrack_nat_helper_find(const char *mod_name) { struct nf_conntrack_nat_helper *cur; bool found = false; list_for_each_entry_rcu(cur, &nf_ct_nat_helpers, list) { if (!strcmp(cur->mod_name, mod_name)) { found = true; break; } } return found ? cur : NULL; } int nf_nat_helper_try_module_get(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; struct nf_conntrack_nat_helper *nat; char mod_name[NF_CT_HELPER_NAME_LEN]; int ret = 0; rcu_read_lock(); h = __nf_conntrack_helper_find(name, l3num, protonum); if (!h) { rcu_read_unlock(); return -ENOENT; } nat = nf_conntrack_nat_helper_find(h->nat_mod_name); if (!nat) { snprintf(mod_name, sizeof(mod_name), "%s", h->nat_mod_name); rcu_read_unlock(); request_module("%s", mod_name); rcu_read_lock(); nat = nf_conntrack_nat_helper_find(mod_name); if (!nat) { rcu_read_unlock(); return -ENOENT; } } if (!try_module_get(nat->module)) ret = -ENOENT; rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(nf_nat_helper_try_module_get); void nf_nat_helper_put(struct nf_conntrack_helper *helper) { struct nf_conntrack_nat_helper *nat; nat = nf_conntrack_nat_helper_find(helper->nat_mod_name); if (WARN_ON_ONCE(!nat)) return; module_put(nat->module); } EXPORT_SYMBOL_GPL(nf_nat_helper_put); struct nf_conn_help * nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) { struct nf_conn_help *help; help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp); if (help) INIT_HLIST_HEAD(&help->expectations); else pr_debug("failed to add helper extension area"); return help; } EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, gfp_t flags) { struct nf_conntrack_helper *helper = NULL; struct nf_conn_help *help; /* We already got a helper explicitly attached (e.g. nft_ct) */ if (test_bit(IPS_HELPER_BIT, &ct->status)) return 0; if (WARN_ON_ONCE(!tmpl)) return 0; help = nfct_help(tmpl); if (help != NULL) { helper = rcu_dereference(help->helper); set_bit(IPS_HELPER_BIT, &ct->status); } help = nfct_help(ct); if (helper == NULL) { if (help) RCU_INIT_POINTER(help->helper, NULL); return 0; } if (help == NULL) { help = nf_ct_helper_ext_add(ct, flags); if (help == NULL) return -ENOMEM; } else { /* We only allow helper re-assignment of the same sort since * we cannot reallocate the helper extension area. */ struct nf_conntrack_helper *tmp = rcu_dereference(help->helper); if (tmp && tmp->help != helper->help) { RCU_INIT_POINTER(help->helper, NULL); return 0; } } rcu_assign_pointer(help->helper, helper); return 0; } EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); /* appropriate ct lock protecting must be taken by caller */ static int unhelp(struct nf_conn *ct, void *me) { struct nf_conn_help *help = nfct_help(ct); if (help && rcu_dereference_raw(help->helper) == me) { nf_conntrack_event(IPCT_HELPER, ct); RCU_INIT_POINTER(help->helper, NULL); } /* We are not intended to delete this conntrack. */ return 0; } void nf_ct_helper_destroy(struct nf_conn *ct) { struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_helper *helper; if (help) { rcu_read_lock(); helper = rcu_dereference(help->helper); if (helper && helper->destroy) helper->destroy(ct); rcu_read_unlock(); } } static LIST_HEAD(nf_ct_helper_expectfn_list); void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n) { spin_lock_bh(&nf_conntrack_expect_lock); list_add_rcu(&n->head, &nf_ct_helper_expectfn_list); spin_unlock_bh(&nf_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register); void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n) { spin_lock_bh(&nf_conntrack_expect_lock); list_del_rcu(&n->head); spin_unlock_bh(&nf_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister); /* Caller should hold the rcu lock */ struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_name(const char *name) { struct nf_ct_helper_expectfn *cur; bool found = false; list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) { if (!strcmp(cur->name, name)) { found = true; break; } } return found ? cur : NULL; } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_name); /* Caller should hold the rcu lock */ struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_symbol(const void *symbol) { struct nf_ct_helper_expectfn *cur; bool found = false; list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) { if (cur->expectfn == symbol) { found = true; break; } } return found ? cur : NULL; } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol); __printf(3, 4) void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, const char *fmt, ...) { const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; /* Called from the helper function, this call never fails */ help = nfct_help(ct); /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf); va_end(args); } EXPORT_SYMBOL_GPL(nf_ct_helper_log); int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; unsigned int h = helper_hash(&me->tuple); struct nf_conntrack_helper *cur; int ret = 0, i; BUG_ON(me->expect_policy == NULL); BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); if (!nf_ct_helper_hash) return -ENOENT; if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) return -EINVAL; mutex_lock(&nf_ct_helper_mutex); for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry(cur, &nf_ct_helper_hash[i], hnode) { if (!strcmp(cur->name, me->name) && (cur->tuple.src.l3num == NFPROTO_UNSPEC || cur->tuple.src.l3num == me->tuple.src.l3num) && cur->tuple.dst.protonum == me->tuple.dst.protonum) { ret = -EEXIST; goto out; } } } /* avoid unpredictable behaviour for auto_assign_helper */ if (!(me->flags & NF_CT_HELPER_F_USERSPACE)) { hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) { ret = -EEXIST; goto out; } } } refcount_set(&me->refcnt, 1); hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); nf_ct_helper_count++; out: mutex_unlock(&nf_ct_helper_mutex); return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) { struct nf_conn_help *help = nfct_help(exp->master); const struct nf_conntrack_helper *me = data; const struct nf_conntrack_helper *this; if (exp->helper == me) return true; this = rcu_dereference_protected(help->helper, lockdep_is_held(&nf_conntrack_expect_lock)); return this == me; } void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) { mutex_lock(&nf_ct_helper_mutex); hlist_del_rcu(&me->hnode); nf_ct_helper_count--; mutex_unlock(&nf_ct_helper_mutex); /* Make sure every nothing is still using the helper unless its a * connection in the hash. */ synchronize_rcu(); nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_iterate_destroy(unhelp, me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); void nf_ct_helper_init(struct nf_conntrack_helper *helper, u16 l3num, u16 protonum, const char *name, u16 default_port, u16 spec_port, u32 id, const struct nf_conntrack_expect_policy *exp_pol, u32 expect_class_max, int (*help)(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo), int (*from_nlattr)(struct nlattr *attr, struct nf_conn *ct), struct module *module) { helper->tuple.src.l3num = l3num; helper->tuple.dst.protonum = protonum; helper->tuple.src.u.all = htons(spec_port); helper->expect_policy = exp_pol; helper->expect_class_max = expect_class_max; helper->help = help; helper->from_nlattr = from_nlattr; helper->me = module; snprintf(helper->nat_mod_name, sizeof(helper->nat_mod_name), NF_NAT_HELPER_PREFIX "%s", name); if (spec_port == default_port) snprintf(helper->name, sizeof(helper->name), "%s", name); else snprintf(helper->name, sizeof(helper->name), "%s-%u", name, id); } EXPORT_SYMBOL_GPL(nf_ct_helper_init); int nf_conntrack_helpers_register(struct nf_conntrack_helper *helper, unsigned int n) { unsigned int i; int err = 0; for (i = 0; i < n; i++) { err = nf_conntrack_helper_register(&helper[i]); if (err < 0) goto err; } return err; err: if (i > 0) nf_conntrack_helpers_unregister(helper, i); return err; } EXPORT_SYMBOL_GPL(nf_conntrack_helpers_register); void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper, unsigned int n) { while (n-- > 0) nf_conntrack_helper_unregister(&helper[n]); } EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister); void nf_nat_helper_register(struct nf_conntrack_nat_helper *nat) { mutex_lock(&nf_ct_nat_helpers_mutex); list_add_rcu(&nat->list, &nf_ct_nat_helpers); mutex_unlock(&nf_ct_nat_helpers_mutex); } EXPORT_SYMBOL_GPL(nf_nat_helper_register); void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat) { mutex_lock(&nf_ct_nat_helpers_mutex); list_del_rcu(&nat->list); mutex_unlock(&nf_ct_nat_helpers_mutex); } EXPORT_SYMBOL_GPL(nf_nat_helper_unregister); int nf_conntrack_helper_init(void) { nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); if (!nf_ct_helper_hash) return -ENOMEM; INIT_LIST_HEAD(&nf_ct_nat_helpers); return 0; } void nf_conntrack_helper_fini(void) { kvfree(nf_ct_helper_hash); nf_ct_helper_hash = NULL; } |
3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 | // SPDX-License-Identifier: GPL-2.0-or-later /* Server address list management * * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/slab.h> #include <linux/ctype.h> #include <linux/dns_resolver.h> #include <linux/inet.h> #include <keys/rxrpc-type.h> #include "internal.h" #include "afs_fs.h" /* * Release an address list. */ void afs_put_addrlist(struct afs_addr_list *alist) { if (alist && refcount_dec_and_test(&alist->usage)) kfree_rcu(alist, rcu); } /* * Allocate an address list. */ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, unsigned short service, unsigned short port) { struct afs_addr_list *alist; unsigned int i; _enter("%u,%u,%u", nr, service, port); if (nr > AFS_MAX_ADDRESSES) nr = AFS_MAX_ADDRESSES; alist = kzalloc(struct_size(alist, addrs, nr), GFP_KERNEL); if (!alist) return NULL; refcount_set(&alist->usage, 1); alist->max_addrs = nr; for (i = 0; i < nr; i++) { struct sockaddr_rxrpc *srx = &alist->addrs[i]; srx->srx_family = AF_RXRPC; srx->srx_service = service; srx->transport_type = SOCK_DGRAM; srx->transport_len = sizeof(srx->transport.sin6); srx->transport.sin6.sin6_family = AF_INET6; srx->transport.sin6.sin6_port = htons(port); } return alist; } /* * Parse a text string consisting of delimited addresses. */ struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net, const char *text, size_t len, char delim, unsigned short service, unsigned short port) { struct afs_vlserver_list *vllist; struct afs_addr_list *alist; const char *p, *end = text + len; const char *problem; unsigned int nr = 0; int ret = -ENOMEM; _enter("%*.*s,%c", (int)len, (int)len, text, delim); if (!len) { _leave(" = -EDESTADDRREQ [empty]"); return ERR_PTR(-EDESTADDRREQ); } if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len))) delim = ','; /* Count the addresses */ p = text; do { if (!*p) { problem = "nul"; goto inval; } if (*p == delim) continue; nr++; if (*p == '[') { p++; if (p == end) { problem = "brace1"; goto inval; } p = memchr(p, ']', end - p); if (!p) { problem = "brace2"; goto inval; } p++; if (p >= end) break; } p = memchr(p, delim, end - p); if (!p) break; p++; } while (p < end); _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES); vllist = afs_alloc_vlserver_list(1); if (!vllist) return ERR_PTR(-ENOMEM); vllist->nr_servers = 1; vllist->servers[0].server = afs_alloc_vlserver("<dummy>", 7, AFS_VL_PORT); if (!vllist->servers[0].server) goto error_vl; alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT); if (!alist) goto error; /* Extract the addresses */ p = text; do { const char *q, *stop; unsigned int xport = port; __be32 x[4]; int family; if (*p == delim) { p++; continue; } if (*p == '[') { p++; q = memchr(p, ']', end - p); } else { for (q = p; q < end; q++) if (*q == '+' || *q == delim) break; } if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) { family = AF_INET; } else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) { family = AF_INET6; } else { problem = "family"; goto bad_address; } p = q; if (stop != p) { problem = "nostop"; goto bad_address; } if (q < end && *q == ']') p++; if (p < end) { if (*p == '+') { /* Port number specification "+1234" */ xport = 0; p++; if (p >= end || !isdigit(*p)) { problem = "port"; goto bad_address; } do { xport *= 10; xport += *p - '0'; if (xport > 65535) { problem = "pval"; goto bad_address; } p++; } while (p < end && isdigit(*p)); } else if (*p == delim) { p++; } else { problem = "weird"; goto bad_address; } } if (family == AF_INET) afs_merge_fs_addr4(alist, x[0], xport); else afs_merge_fs_addr6(alist, x, xport); } while (p < end); rcu_assign_pointer(vllist->servers[0].server->addresses, alist); _leave(" = [nr %u]", alist->nr_addrs); return vllist; inval: _leave(" = -EINVAL [%s %zu %*.*s]", problem, p - text, (int)len, (int)len, text); return ERR_PTR(-EINVAL); bad_address: _leave(" = -EINVAL [%s %zu %*.*s]", problem, p - text, (int)len, (int)len, text); ret = -EINVAL; error: afs_put_addrlist(alist); error_vl: afs_put_vlserverlist(net, vllist); return ERR_PTR(ret); } /* * Compare old and new address lists to see if there's been any change. * - How to do this in better than O(Nlog(N)) time? * - We don't really want to sort the address list, but would rather take the * list as we got it so as not to undo record rotation by the DNS server. */ #if 0 static int afs_cmp_addr_list(const struct afs_addr_list *a1, const struct afs_addr_list *a2) { } #endif /* * Perform a DNS query for VL servers and build a up an address list. */ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) { struct afs_vlserver_list *vllist; char *result = NULL; int ret; _enter("%s", cell->name); ret = dns_query(cell->net->net, "afsdb", cell->name, cell->name_len, "srv=1", &result, _expiry, true); if (ret < 0) { _leave(" = %d [dns]", ret); return ERR_PTR(ret); } if (*_expiry == 0) *_expiry = ktime_get_real_seconds() + 60; if (ret > 1 && result[0] == 0) vllist = afs_extract_vlserver_list(cell, result, ret); else vllist = afs_parse_text_addrs(cell->net, result, ret, ',', VL_SERVICE, AFS_VL_PORT); kfree(result); if (IS_ERR(vllist) && vllist != ERR_PTR(-ENOMEM)) pr_err("Failed to parse DNS data %ld\n", PTR_ERR(vllist)); return vllist; } /* * Merge an IPv4 entry into a fileserver address list. */ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) { struct sockaddr_rxrpc *srx; u32 addr = ntohl(xdr); int i; if (alist->nr_addrs >= alist->max_addrs) return; for (i = 0; i < alist->nr_ipv4; i++) { struct sockaddr_in *a = &alist->addrs[i].transport.sin; u32 a_addr = ntohl(a->sin_addr.s_addr); u16 a_port = ntohs(a->sin_port); if (addr == a_addr && port == a_port) return; if (addr == a_addr && port < a_port) break; if (addr < a_addr) break; } if (i < alist->nr_addrs) memmove(alist->addrs + i + 1, alist->addrs + i, sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); srx = &alist->addrs[i]; srx->srx_family = AF_RXRPC; srx->transport_type = SOCK_DGRAM; srx->transport_len = sizeof(srx->transport.sin); srx->transport.sin.sin_family = AF_INET; srx->transport.sin.sin_port = htons(port); srx->transport.sin.sin_addr.s_addr = xdr; alist->nr_ipv4++; alist->nr_addrs++; } /* * Merge an IPv6 entry into a fileserver address list. */ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) { struct sockaddr_rxrpc *srx; int i, diff; if (alist->nr_addrs >= alist->max_addrs) return; for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { struct sockaddr_in6 *a = &alist->addrs[i].transport.sin6; u16 a_port = ntohs(a->sin6_port); diff = memcmp(xdr, &a->sin6_addr, 16); if (diff == 0 && port == a_port) return; if (diff == 0 && port < a_port) break; if (diff < 0) break; } if (i < alist->nr_addrs) memmove(alist->addrs + i + 1, alist->addrs + i, sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); srx = &alist->addrs[i]; srx->srx_family = AF_RXRPC; srx->transport_type = SOCK_DGRAM; srx->transport_len = sizeof(srx->transport.sin6); srx->transport.sin6.sin6_family = AF_INET6; srx->transport.sin6.sin6_port = htons(port); memcpy(&srx->transport.sin6.sin6_addr, xdr, 16); alist->nr_addrs++; } /* * Get an address to try. */ bool afs_iterate_addresses(struct afs_addr_cursor *ac) { unsigned long set, failed; int index; if (!ac->alist) return false; set = ac->alist->responded; failed = ac->alist->failed; _enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index); ac->nr_iterations++; set &= ~(failed | ac->tried); if (!set) return false; index = READ_ONCE(ac->alist->preferred); if (test_bit(index, &set)) goto selected; index = __ffs(set); selected: ac->index = index; set_bit(index, &ac->tried); ac->responded = false; return true; } /* * Release an address list cursor. */ int afs_end_cursor(struct afs_addr_cursor *ac) { struct afs_addr_list *alist; alist = ac->alist; if (alist) { if (ac->responded && ac->index != alist->preferred && test_bit(ac->alist->preferred, &ac->tried)) WRITE_ONCE(alist->preferred, ac->index); afs_put_addrlist(alist); ac->alist = NULL; } return ac->error; } |
13 22 10 18 9 3 10 2 2 2 1 1 1473 1473 14 3 9 1 8 7 8 1 15 1 1 13 1 1 5 5 8 2 1 5 10 3 3 1 2 46 1 1 1 5 7 6 2 14 5 9 12 6 11 6 1 6 2 3 3 2 2 2 3 1 1 1 2 1 1 2 1 1 2 23 1 1 2 1 1 4 12 13 5 1 6 30 4 2 1 19 4 4 12 11 11 2 2 4 3 1 1 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 | // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* raw.c - Raw sockets for protocol family CAN * * Copyright (c) 2002-2007 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Volkswagen nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * The provided data structures and external interfaces from this code * are not restricted to be used by modules with a GPL compatible license. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ #include <linux/module.h> #include <linux/init.h> #include <linux/uio.h> #include <linux/net.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/socket.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> #include <linux/can/dev.h> /* for can_is_canxl_dev_mtu() */ #include <linux/can/skb.h> #include <linux/can/raw.h> #include <net/sock.h> #include <net/net_namespace.h> MODULE_DESCRIPTION("PF_CAN raw protocol"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>"); MODULE_ALIAS("can-proto-1"); #define RAW_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_ifindex) #define MASK_ALL 0 /* A raw socket has a list of can_filters attached to it, each receiving * the CAN frames matching that filter. If the filter list is empty, * no CAN frames will be received by the socket. The default after * opening the socket, is to have one filter which receives all frames. * The filter list is allocated dynamically with the exception of the * list containing only one item. This common case is optimized by * storing the single filter in dfilter, to avoid using dynamic memory. */ struct uniqframe { int skbcnt; const struct sk_buff *skb; unsigned int join_rx_count; }; struct raw_sock { struct sock sk; int bound; int ifindex; struct net_device *dev; netdevice_tracker dev_tracker; struct list_head notifier; int loopback; int recv_own_msgs; int fd_frames; int xl_frames; int join_filters; int count; /* number of active filters */ struct can_filter dfilter; /* default/single filter */ struct can_filter *filter; /* pointer to filter(s) */ can_err_mask_t err_mask; struct uniqframe __percpu *uniq; }; static LIST_HEAD(raw_notifier_list); static DEFINE_SPINLOCK(raw_notifier_lock); static struct raw_sock *raw_busy_notifier; /* Return pointer to store the extra msg flags for raw_recvmsg(). * We use the space of one unsigned int beyond the 'struct sockaddr_can' * in skb->cb. */ static inline unsigned int *raw_flags(struct sk_buff *skb) { sock_skb_cb_check_size(sizeof(struct sockaddr_can) + sizeof(unsigned int)); /* return pointer after struct sockaddr_can */ return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]); } static inline struct raw_sock *raw_sk(const struct sock *sk) { return (struct raw_sock *)sk; } static void raw_rcv(struct sk_buff *oskb, void *data) { struct sock *sk = (struct sock *)data; struct raw_sock *ro = raw_sk(sk); struct sockaddr_can *addr; struct sk_buff *skb; unsigned int *pflags; /* check the received tx sock reference */ if (!ro->recv_own_msgs && oskb->sk == sk) return; /* make sure to not pass oversized frames to the socket */ if ((!ro->fd_frames && can_is_canfd_skb(oskb)) || (!ro->xl_frames && can_is_canxl_skb(oskb))) return; /* eliminate multiple filter matches for the same skb */ if (this_cpu_ptr(ro->uniq)->skb == oskb && this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) { if (!ro->join_filters) return; this_cpu_inc(ro->uniq->join_rx_count); /* drop frame until all enabled filters matched */ if (this_cpu_ptr(ro->uniq)->join_rx_count < ro->count) return; } else { this_cpu_ptr(ro->uniq)->skb = oskb; this_cpu_ptr(ro->uniq)->skbcnt = can_skb_prv(oskb)->skbcnt; this_cpu_ptr(ro->uniq)->join_rx_count = 1; /* drop first frame to check all enabled filters? */ if (ro->join_filters && ro->count > 1) return; } /* clone the given skb to be able to enqueue it into the rcv queue */ skb = skb_clone(oskb, GFP_ATOMIC); if (!skb) return; /* Put the datagram to the queue so that raw_recvmsg() can get * it from there. We need to pass the interface index to * raw_recvmsg(). We pass a whole struct sockaddr_can in * skb->cb containing the interface index. */ sock_skb_cb_check_size(sizeof(struct sockaddr_can)); addr = (struct sockaddr_can *)skb->cb; memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; addr->can_ifindex = skb->dev->ifindex; /* add CAN specific message flags for raw_recvmsg() */ pflags = raw_flags(skb); *pflags = 0; if (oskb->sk) *pflags |= MSG_DONTROUTE; if (oskb->sk == sk) *pflags |= MSG_CONFIRM; if (sock_queue_rcv_skb(sk, skb) < 0) kfree_skb(skb); } static int raw_enable_filters(struct net *net, struct net_device *dev, struct sock *sk, struct can_filter *filter, int count) { int err = 0; int i; for (i = 0; i < count; i++) { err = can_rx_register(net, dev, filter[i].can_id, filter[i].can_mask, raw_rcv, sk, "raw", sk); if (err) { /* clean up successfully registered filters */ while (--i >= 0) can_rx_unregister(net, dev, filter[i].can_id, filter[i].can_mask, raw_rcv, sk); break; } } return err; } static int raw_enable_errfilter(struct net *net, struct net_device *dev, struct sock *sk, can_err_mask_t err_mask) { int err = 0; if (err_mask) err = can_rx_register(net, dev, 0, err_mask | CAN_ERR_FLAG, raw_rcv, sk, "raw", sk); return err; } static void raw_disable_filters(struct net *net, struct net_device *dev, struct sock *sk, struct can_filter *filter, int count) { int i; for (i = 0; i < count; i++) can_rx_unregister(net, dev, filter[i].can_id, filter[i].can_mask, raw_rcv, sk); } static inline void raw_disable_errfilter(struct net *net, struct net_device *dev, struct sock *sk, can_err_mask_t err_mask) { if (err_mask) can_rx_unregister(net, dev, 0, err_mask | CAN_ERR_FLAG, raw_rcv, sk); } static inline void raw_disable_allfilters(struct net *net, struct net_device *dev, struct sock *sk) { struct raw_sock *ro = raw_sk(sk); raw_disable_filters(net, dev, sk, ro->filter, ro->count); raw_disable_errfilter(net, dev, sk, ro->err_mask); } static int raw_enable_allfilters(struct net *net, struct net_device *dev, struct sock *sk) { struct raw_sock *ro = raw_sk(sk); int err; err = raw_enable_filters(net, dev, sk, ro->filter, ro->count); if (!err) { err = raw_enable_errfilter(net, dev, sk, ro->err_mask); if (err) raw_disable_filters(net, dev, sk, ro->filter, ro->count); } return err; } static void raw_notify(struct raw_sock *ro, unsigned long msg, struct net_device *dev) { struct sock *sk = &ro->sk; if (!net_eq(dev_net(dev), sock_net(sk))) return; if (ro->dev != dev) return; switch (msg) { case NETDEV_UNREGISTER: lock_sock(sk); /* remove current filters & unregister */ if (ro->bound) { raw_disable_allfilters(dev_net(dev), dev, sk); netdev_put(dev, &ro->dev_tracker); } if (ro->count > 1) kfree(ro->filter); ro->ifindex = 0; ro->bound = 0; ro->dev = NULL; ro->count = 0; release_sock(sk); sk->sk_err = ENODEV; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); break; case NETDEV_DOWN: sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); break; } } static int raw_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN) return NOTIFY_DONE; if (unlikely(raw_busy_notifier)) /* Check for reentrant bug. */ return NOTIFY_DONE; spin_lock(&raw_notifier_lock); list_for_each_entry(raw_busy_notifier, &raw_notifier_list, notifier) { spin_unlock(&raw_notifier_lock); raw_notify(raw_busy_notifier, msg, dev); spin_lock(&raw_notifier_lock); } raw_busy_notifier = NULL; spin_unlock(&raw_notifier_lock); return NOTIFY_DONE; } static int raw_init(struct sock *sk) { struct raw_sock *ro = raw_sk(sk); ro->bound = 0; ro->ifindex = 0; ro->dev = NULL; /* set default filter to single entry dfilter */ ro->dfilter.can_id = 0; ro->dfilter.can_mask = MASK_ALL; ro->filter = &ro->dfilter; ro->count = 1; /* set default loopback behaviour */ ro->loopback = 1; ro->recv_own_msgs = 0; ro->fd_frames = 0; ro->xl_frames = 0; ro->join_filters = 0; /* alloc_percpu provides zero'ed memory */ ro->uniq = alloc_percpu(struct uniqframe); if (unlikely(!ro->uniq)) return -ENOMEM; /* set notifier */ spin_lock(&raw_notifier_lock); list_add_tail(&ro->notifier, &raw_notifier_list); spin_unlock(&raw_notifier_lock); return 0; } static int raw_release(struct socket *sock) { struct sock *sk = sock->sk; struct raw_sock *ro; if (!sk) return 0; ro = raw_sk(sk); spin_lock(&raw_notifier_lock); while (raw_busy_notifier == ro) { spin_unlock(&raw_notifier_lock); schedule_timeout_uninterruptible(1); spin_lock(&raw_notifier_lock); } list_del(&ro->notifier); spin_unlock(&raw_notifier_lock); rtnl_lock(); lock_sock(sk); /* remove current filters & unregister */ if (ro->bound) { if (ro->dev) { raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk); netdev_put(ro->dev, &ro->dev_tracker); } else { raw_disable_allfilters(sock_net(sk), NULL, sk); } } if (ro->count > 1) kfree(ro->filter); ro->ifindex = 0; ro->bound = 0; ro->dev = NULL; ro->count = 0; free_percpu(ro->uniq); sock_orphan(sk); sock->sk = NULL; release_sock(sk); rtnl_unlock(); sock_put(sk); return 0; } static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); struct net_device *dev = NULL; int ifindex; int err = 0; int notify_enetdown = 0; if (len < RAW_MIN_NAMELEN) return -EINVAL; if (addr->can_family != AF_CAN) return -EINVAL; rtnl_lock(); lock_sock(sk); if (ro->bound && addr->can_ifindex == ro->ifindex) goto out; if (addr->can_ifindex) { dev = dev_get_by_index(sock_net(sk), addr->can_ifindex); if (!dev) { err = -ENODEV; goto out; } if (dev->type != ARPHRD_CAN) { err = -ENODEV; goto out_put_dev; } if (!(dev->flags & IFF_UP)) notify_enetdown = 1; ifindex = dev->ifindex; /* filters set by default/setsockopt */ err = raw_enable_allfilters(sock_net(sk), dev, sk); if (err) goto out_put_dev; } else { ifindex = 0; /* filters set by default/setsockopt */ err = raw_enable_allfilters(sock_net(sk), NULL, sk); } if (!err) { if (ro->bound) { /* unregister old filters */ if (ro->dev) { raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk); /* drop reference to old ro->dev */ netdev_put(ro->dev, &ro->dev_tracker); } else { raw_disable_allfilters(sock_net(sk), NULL, sk); } } ro->ifindex = ifindex; ro->bound = 1; /* bind() ok -> hold a reference for new ro->dev */ ro->dev = dev; if (ro->dev) netdev_hold(ro->dev, &ro->dev_tracker, GFP_KERNEL); } out_put_dev: /* remove potential reference from dev_get_by_index() */ dev_put(dev); out: release_sock(sk); rtnl_unlock(); if (notify_enetdown) { sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } return err; } static int raw_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); if (peer) return -EOPNOTSUPP; memset(addr, 0, RAW_MIN_NAMELEN); addr->can_family = AF_CAN; addr->can_ifindex = ro->ifindex; return RAW_MIN_NAMELEN; } static int raw_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); struct can_filter *filter = NULL; /* dyn. alloc'ed filters */ struct can_filter sfilter; /* single filter */ struct net_device *dev = NULL; can_err_mask_t err_mask = 0; int fd_frames; int count = 0; int err = 0; if (level != SOL_CAN_RAW) return -EINVAL; switch (optname) { case CAN_RAW_FILTER: if (optlen % sizeof(struct can_filter) != 0) return -EINVAL; if (optlen > CAN_RAW_FILTER_MAX * sizeof(struct can_filter)) return -EINVAL; count = optlen / sizeof(struct can_filter); if (count > 1) { /* filter does not fit into dfilter => alloc space */ filter = memdup_sockptr(optval, optlen); if (IS_ERR(filter)) return PTR_ERR(filter); } else if (count == 1) { if (copy_from_sockptr(&sfilter, optval, sizeof(sfilter))) return -EFAULT; } rtnl_lock(); lock_sock(sk); dev = ro->dev; if (ro->bound && dev) { if (dev->reg_state != NETREG_REGISTERED) { if (count > 1) kfree(filter); err = -ENODEV; goto out_fil; } } if (ro->bound) { /* (try to) register the new filters */ if (count == 1) err = raw_enable_filters(sock_net(sk), dev, sk, &sfilter, 1); else err = raw_enable_filters(sock_net(sk), dev, sk, filter, count); if (err) { if (count > 1) kfree(filter); goto out_fil; } /* remove old filter registrations */ raw_disable_filters(sock_net(sk), dev, sk, ro->filter, ro->count); } /* remove old filter space */ if (ro->count > 1) kfree(ro->filter); /* link new filters to the socket */ if (count == 1) { /* copy filter data for single filter */ ro->dfilter = sfilter; filter = &ro->dfilter; } ro->filter = filter; ro->count = count; out_fil: release_sock(sk); rtnl_unlock(); break; case CAN_RAW_ERR_FILTER: if (optlen != sizeof(err_mask)) return -EINVAL; if (copy_from_sockptr(&err_mask, optval, optlen)) return -EFAULT; err_mask &= CAN_ERR_MASK; rtnl_lock(); lock_sock(sk); dev = ro->dev; if (ro->bound && dev) { if (dev->reg_state != NETREG_REGISTERED) { err = -ENODEV; goto out_err; } } /* remove current error mask */ if (ro->bound) { /* (try to) register the new err_mask */ err = raw_enable_errfilter(sock_net(sk), dev, sk, err_mask); if (err) goto out_err; /* remove old err_mask registration */ raw_disable_errfilter(sock_net(sk), dev, sk, ro->err_mask); } /* link new err_mask to the socket */ ro->err_mask = err_mask; out_err: release_sock(sk); rtnl_unlock(); break; case CAN_RAW_LOOPBACK: if (optlen != sizeof(ro->loopback)) return -EINVAL; if (copy_from_sockptr(&ro->loopback, optval, optlen)) return -EFAULT; break; case CAN_RAW_RECV_OWN_MSGS: if (optlen != sizeof(ro->recv_own_msgs)) return -EINVAL; if (copy_from_sockptr(&ro->recv_own_msgs, optval, optlen)) return -EFAULT; break; case CAN_RAW_FD_FRAMES: if (optlen != sizeof(fd_frames)) return -EINVAL; if (copy_from_sockptr(&fd_frames, optval, optlen)) return -EFAULT; /* Enabling CAN XL includes CAN FD */ if (ro->xl_frames && !fd_frames) return -EINVAL; ro->fd_frames = fd_frames; break; case CAN_RAW_XL_FRAMES: if (optlen != sizeof(ro->xl_frames)) return -EINVAL; if (copy_from_sockptr(&ro->xl_frames, optval, optlen)) return -EFAULT; /* Enabling CAN XL includes CAN FD */ if (ro->xl_frames) ro->fd_frames = ro->xl_frames; break; case CAN_RAW_JOIN_FILTERS: if (optlen != sizeof(ro->join_filters)) return -EINVAL; if (copy_from_sockptr(&ro->join_filters, optval, optlen)) return -EFAULT; break; default: return -ENOPROTOOPT; } return err; } static int raw_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); int len; void *val; int err = 0; if (level != SOL_CAN_RAW) return -EINVAL; if (get_user(len, optlen)) return -EFAULT; if (len < 0) return -EINVAL; switch (optname) { case CAN_RAW_FILTER: lock_sock(sk); if (ro->count > 0) { int fsize = ro->count * sizeof(struct can_filter); /* user space buffer to small for filter list? */ if (len < fsize) { /* return -ERANGE and needed space in optlen */ err = -ERANGE; if (put_user(fsize, optlen)) err = -EFAULT; } else { if (len > fsize) len = fsize; if (copy_to_user(optval, ro->filter, len)) err = -EFAULT; } } else { len = 0; } release_sock(sk); if (!err) err = put_user(len, optlen); return err; case CAN_RAW_ERR_FILTER: if (len > sizeof(can_err_mask_t)) len = sizeof(can_err_mask_t); val = &ro->err_mask; break; case CAN_RAW_LOOPBACK: if (len > sizeof(int)) len = sizeof(int); val = &ro->loopback; break; case CAN_RAW_RECV_OWN_MSGS: if (len > sizeof(int)) len = sizeof(int); val = &ro->recv_own_msgs; break; case CAN_RAW_FD_FRAMES: if (len > sizeof(int)) len = sizeof(int); val = &ro->fd_frames; break; case CAN_RAW_XL_FRAMES: if (len > sizeof(int)) len = sizeof(int); val = &ro->xl_frames; break; case CAN_RAW_JOIN_FILTERS: if (len > sizeof(int)) len = sizeof(int); val = &ro->join_filters; break; default: return -ENOPROTOOPT; } if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, val, len)) return -EFAULT; return 0; } static bool raw_bad_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu) { /* Classical CAN -> no checks for flags and device capabilities */ if (can_is_can_skb(skb)) return false; /* CAN FD -> needs to be enabled and a CAN FD or CAN XL device */ if (ro->fd_frames && can_is_canfd_skb(skb) && (mtu == CANFD_MTU || can_is_canxl_dev_mtu(mtu))) return false; /* CAN XL -> needs to be enabled and a CAN XL device */ if (ro->xl_frames && can_is_canxl_skb(skb) && can_is_canxl_dev_mtu(mtu)) return false; return true; } static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); struct sockcm_cookie sockc; struct sk_buff *skb; struct net_device *dev; int ifindex; int err = -EINVAL; /* check for valid CAN frame sizes */ if (size < CANXL_HDR_SIZE + CANXL_MIN_DLEN || size > CANXL_MTU) return -EINVAL; if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); if (msg->msg_namelen < RAW_MIN_NAMELEN) return -EINVAL; if (addr->can_family != AF_CAN) return -EINVAL; ifindex = addr->can_ifindex; } else { ifindex = ro->ifindex; } dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) return -ENXIO; skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv), msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) goto put_dev; can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; can_skb_prv(skb)->skbcnt = 0; /* fill the skb before testing for valid CAN frames */ err = memcpy_from_msg(skb_put(skb, size), msg, size); if (err < 0) goto free_skb; err = -EINVAL; if (raw_bad_txframe(ro, skb, dev->mtu)) goto free_skb; sockcm_init(&sockc, sk); if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) goto free_skb; } skb->dev = dev; skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; skb_setup_tx_timestamp(skb, sockc.tsflags); err = can_send(skb, ro->loopback); dev_put(dev); if (err) goto send_failed; return size; free_skb: kfree_skb(skb); put_dev: dev_put(dev); send_failed: return err; } static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int err = 0; if (flags & MSG_ERRQUEUE) return sock_recv_errqueue(sk, msg, size, SOL_CAN_RAW, SCM_CAN_RAW_ERRQUEUE); skb = skb_recv_datagram(sk, flags, &err); if (!skb) return err; if (size < skb->len) msg->msg_flags |= MSG_TRUNC; else size = skb->len; err = memcpy_to_msg(msg, skb->data, size); if (err < 0) { skb_free_datagram(sk, skb); return err; } sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { __sockaddr_check_size(RAW_MIN_NAMELEN); msg->msg_namelen = RAW_MIN_NAMELEN; memcpy(msg->msg_name, skb->cb, msg->msg_namelen); } /* assign the flags that have been recorded in raw_rcv() */ msg->msg_flags |= *(raw_flags(skb)); skb_free_datagram(sk, skb); return size; } static int raw_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd, unsigned long arg) { /* no ioctls for socket layer -> hand it down to NIC layer */ return -ENOIOCTLCMD; } static const struct proto_ops raw_ops = { .family = PF_CAN, .release = raw_release, .bind = raw_bind, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = raw_getname, .poll = datagram_poll, .ioctl = raw_sock_no_ioctlcmd, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = raw_setsockopt, .getsockopt = raw_getsockopt, .sendmsg = raw_sendmsg, .recvmsg = raw_recvmsg, .mmap = sock_no_mmap, }; static struct proto raw_proto __read_mostly = { .name = "CAN_RAW", .owner = THIS_MODULE, .obj_size = sizeof(struct raw_sock), .init = raw_init, }; static const struct can_proto raw_can_proto = { .type = SOCK_RAW, .protocol = CAN_RAW, .ops = &raw_ops, .prot = &raw_proto, }; static struct notifier_block canraw_notifier = { .notifier_call = raw_notifier }; static __init int raw_module_init(void) { int err; pr_info("can: raw protocol\n"); err = register_netdevice_notifier(&canraw_notifier); if (err) return err; err = can_proto_register(&raw_can_proto); if (err < 0) { pr_err("can: registration of raw protocol failed\n"); goto register_proto_failed; } return 0; register_proto_failed: unregister_netdevice_notifier(&canraw_notifier); return err; } static __exit void raw_module_exit(void) { can_proto_unregister(&raw_can_proto); unregister_netdevice_notifier(&canraw_notifier); } module_init(raw_module_init); module_exit(raw_module_exit); |
2 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 | // SPDX-License-Identifier: GPL-2.0-or-later /* * iSCSI transport class definitions * * Copyright (C) IBM Corporation, 2004 * Copyright (C) Mike Christie, 2004 - 2005 * Copyright (C) Dmitry Yusupov, 2004 - 2005 * Copyright (C) Alex Aizman, 2004 - 2005 */ #include <linux/module.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/bsg-lib.h> #include <linux/idr.h> #include <net/tcp.h> #include <scsi/scsi.h> #include <scsi/scsi_host.h> #include <scsi/scsi_device.h> #include <scsi/scsi_transport.h> #include <scsi/scsi_transport_iscsi.h> #include <scsi/iscsi_if.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_bsg_iscsi.h> #define ISCSI_TRANSPORT_VERSION "2.0-870" #define ISCSI_SEND_MAX_ALLOWED 10 #define CREATE_TRACE_POINTS #include <trace/events/iscsi.h> /* * Export tracepoint symbols to be used by other modules. */ EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_conn); EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_eh); EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_session); EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_tcp); EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_sw_tcp); static int dbg_session; module_param_named(debug_session, dbg_session, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(debug_session, "Turn on debugging for sessions in scsi_transport_iscsi " "module. Set to 1 to turn on, and zero to turn off. Default " "is off."); static int dbg_conn; module_param_named(debug_conn, dbg_conn, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(debug_conn, "Turn on debugging for connections in scsi_transport_iscsi " "module. Set to 1 to turn on, and zero to turn off. Default " "is off."); #define ISCSI_DBG_TRANS_SESSION(_session, dbg_fmt, arg...) \ do { \ if (dbg_session) \ iscsi_cls_session_printk(KERN_INFO, _session, \ "%s: " dbg_fmt, \ __func__, ##arg); \ iscsi_dbg_trace(trace_iscsi_dbg_trans_session, \ &(_session)->dev, \ "%s " dbg_fmt, __func__, ##arg); \ } while (0); #define ISCSI_DBG_TRANS_CONN(_conn, dbg_fmt, arg...) \ do { \ if (dbg_conn) \ iscsi_cls_conn_printk(KERN_INFO, _conn, \ "%s: " dbg_fmt, \ __func__, ##arg); \ iscsi_dbg_trace(trace_iscsi_dbg_trans_conn, \ &(_conn)->dev, \ "%s " dbg_fmt, __func__, ##arg); \ } while (0); struct iscsi_internal { struct scsi_transport_template t; struct iscsi_transport *iscsi_transport; struct list_head list; struct device dev; struct transport_container conn_cont; struct transport_container session_cont; }; static DEFINE_IDR(iscsi_ep_idr); static DEFINE_MUTEX(iscsi_ep_idr_mutex); static atomic_t iscsi_session_nr; /* sysfs session id for next new session */ static struct workqueue_struct *iscsi_conn_cleanup_workq; static DEFINE_IDA(iscsi_sess_ida); /* * list of registered transports and lock that must * be held while accessing list. The iscsi_transport_lock must * be acquired after the rx_queue_mutex. */ static LIST_HEAD(iscsi_transports); static DEFINE_SPINLOCK(iscsi_transport_lock); #define to_iscsi_internal(tmpl) \ container_of(tmpl, struct iscsi_internal, t) #define dev_to_iscsi_internal(_dev) \ container_of(_dev, struct iscsi_internal, dev) static void iscsi_transport_release(struct device *dev) { struct iscsi_internal *priv = dev_to_iscsi_internal(dev); kfree(priv); } /* * iscsi_transport_class represents the iscsi_transports that are * registered. */ static struct class iscsi_transport_class = { .name = "iscsi_transport", .dev_release = iscsi_transport_release, }; static ssize_t show_transport_handle(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_internal *priv = dev_to_iscsi_internal(dev); if (!capable(CAP_SYS_ADMIN)) return -EACCES; return sysfs_emit(buf, "%llu\n", (unsigned long long)iscsi_handle(priv->iscsi_transport)); } static DEVICE_ATTR(handle, S_IRUGO, show_transport_handle, NULL); #define show_transport_attr(name, format) \ static ssize_t \ show_transport_##name(struct device *dev, \ struct device_attribute *attr,char *buf) \ { \ struct iscsi_internal *priv = dev_to_iscsi_internal(dev); \ return sysfs_emit(buf, format"\n", priv->iscsi_transport->name);\ } \ static DEVICE_ATTR(name, S_IRUGO, show_transport_##name, NULL); show_transport_attr(caps, "0x%x"); static struct attribute *iscsi_transport_attrs[] = { &dev_attr_handle.attr, &dev_attr_caps.attr, NULL, }; static struct attribute_group iscsi_transport_group = { .attrs = iscsi_transport_attrs, }; /* * iSCSI endpoint attrs */ #define iscsi_dev_to_endpoint(_dev) \ container_of(_dev, struct iscsi_endpoint, dev) #define ISCSI_ATTR(_prefix,_name,_mode,_show,_store) \ struct device_attribute dev_attr_##_prefix##_##_name = \ __ATTR(_name,_mode,_show,_store) static void iscsi_endpoint_release(struct device *dev) { struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev); mutex_lock(&iscsi_ep_idr_mutex); idr_remove(&iscsi_ep_idr, ep->id); mutex_unlock(&iscsi_ep_idr_mutex); kfree(ep); } static struct class iscsi_endpoint_class = { .name = "iscsi_endpoint", .dev_release = iscsi_endpoint_release, }; static ssize_t show_ep_handle(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev); return sysfs_emit(buf, "%d\n", ep->id); } static ISCSI_ATTR(ep, handle, S_IRUGO, show_ep_handle, NULL); static struct attribute *iscsi_endpoint_attrs[] = { &dev_attr_ep_handle.attr, NULL, }; static struct attribute_group iscsi_endpoint_group = { .attrs = iscsi_endpoint_attrs, }; struct iscsi_endpoint * iscsi_create_endpoint(int dd_size) { struct iscsi_endpoint *ep; int err, id; ep = kzalloc(sizeof(*ep) + dd_size, GFP_KERNEL); if (!ep) return NULL; mutex_lock(&iscsi_ep_idr_mutex); /* * First endpoint id should be 1 to comply with user space * applications (iscsid). */ id = idr_alloc(&iscsi_ep_idr, ep, 1, -1, GFP_NOIO); if (id < 0) { mutex_unlock(&iscsi_ep_idr_mutex); printk(KERN_ERR "Could not allocate endpoint ID. Error %d.\n", id); goto free_ep; } mutex_unlock(&iscsi_ep_idr_mutex); ep->id = id; ep->dev.class = &iscsi_endpoint_class; dev_set_name(&ep->dev, "ep-%d", id); err = device_register(&ep->dev); if (err) goto put_dev; err = sysfs_create_group(&ep->dev.kobj, &iscsi_endpoint_group); if (err) goto unregister_dev; if (dd_size) ep->dd_data = &ep[1]; return ep; unregister_dev: device_unregister(&ep->dev); return NULL; put_dev: mutex_lock(&iscsi_ep_idr_mutex); idr_remove(&iscsi_ep_idr, id); mutex_unlock(&iscsi_ep_idr_mutex); put_device(&ep->dev); return NULL; free_ep: kfree(ep); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_endpoint); void iscsi_destroy_endpoint(struct iscsi_endpoint *ep) { sysfs_remove_group(&ep->dev.kobj, &iscsi_endpoint_group); device_unregister(&ep->dev); } EXPORT_SYMBOL_GPL(iscsi_destroy_endpoint); void iscsi_put_endpoint(struct iscsi_endpoint *ep) { put_device(&ep->dev); } EXPORT_SYMBOL_GPL(iscsi_put_endpoint); /** * iscsi_lookup_endpoint - get ep from handle * @handle: endpoint handle * * Caller must do a iscsi_put_endpoint. */ struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle) { struct iscsi_endpoint *ep; mutex_lock(&iscsi_ep_idr_mutex); ep = idr_find(&iscsi_ep_idr, handle); if (!ep) goto unlock; get_device(&ep->dev); unlock: mutex_unlock(&iscsi_ep_idr_mutex); return ep; } EXPORT_SYMBOL_GPL(iscsi_lookup_endpoint); /* * Interface to display network param to sysfs */ static void iscsi_iface_release(struct device *dev) { struct iscsi_iface *iface = iscsi_dev_to_iface(dev); struct device *parent = iface->dev.parent; kfree(iface); put_device(parent); } static struct class iscsi_iface_class = { .name = "iscsi_iface", .dev_release = iscsi_iface_release, }; #define ISCSI_IFACE_ATTR(_prefix, _name, _mode, _show, _store) \ struct device_attribute dev_attr_##_prefix##_##_name = \ __ATTR(_name, _mode, _show, _store) /* iface attrs show */ #define iscsi_iface_attr_show(type, name, param_type, param) \ static ssize_t \ show_##type##_##name(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct iscsi_iface *iface = iscsi_dev_to_iface(dev); \ struct iscsi_transport *t = iface->transport; \ return t->get_iface_param(iface, param_type, param, buf); \ } \ #define iscsi_iface_net_attr(type, name, param) \ iscsi_iface_attr_show(type, name, ISCSI_NET_PARAM, param) \ static ISCSI_IFACE_ATTR(type, name, S_IRUGO, show_##type##_##name, NULL); #define iscsi_iface_attr(type, name, param) \ iscsi_iface_attr_show(type, name, ISCSI_IFACE_PARAM, param) \ static ISCSI_IFACE_ATTR(type, name, S_IRUGO, show_##type##_##name, NULL); /* generic read only ipv4 attribute */ iscsi_iface_net_attr(ipv4_iface, ipaddress, ISCSI_NET_PARAM_IPV4_ADDR); iscsi_iface_net_attr(ipv4_iface, gateway, ISCSI_NET_PARAM_IPV4_GW); iscsi_iface_net_attr(ipv4_iface, subnet, ISCSI_NET_PARAM_IPV4_SUBNET); iscsi_iface_net_attr(ipv4_iface, bootproto, ISCSI_NET_PARAM_IPV4_BOOTPROTO); iscsi_iface_net_attr(ipv4_iface, dhcp_dns_address_en, ISCSI_NET_PARAM_IPV4_DHCP_DNS_ADDR_EN); iscsi_iface_net_attr(ipv4_iface, dhcp_slp_da_info_en, ISCSI_NET_PARAM_IPV4_DHCP_SLP_DA_EN); iscsi_iface_net_attr(ipv4_iface, tos_en, ISCSI_NET_PARAM_IPV4_TOS_EN); iscsi_iface_net_attr(ipv4_iface, tos, ISCSI_NET_PARAM_IPV4_TOS); iscsi_iface_net_attr(ipv4_iface, grat_arp_en, ISCSI_NET_PARAM_IPV4_GRAT_ARP_EN); iscsi_iface_net_attr(ipv4_iface, dhcp_alt_client_id_en, ISCSI_NET_PARAM_IPV4_DHCP_ALT_CLIENT_ID_EN); iscsi_iface_net_attr(ipv4_iface, dhcp_alt_client_id, ISCSI_NET_PARAM_IPV4_DHCP_ALT_CLIENT_ID); iscsi_iface_net_attr(ipv4_iface, dhcp_req_vendor_id_en, ISCSI_NET_PARAM_IPV4_DHCP_REQ_VENDOR_ID_EN); iscsi_iface_net_attr(ipv4_iface, dhcp_use_vendor_id_en, ISCSI_NET_PARAM_IPV4_DHCP_USE_VENDOR_ID_EN); iscsi_iface_net_attr(ipv4_iface, dhcp_vendor_id, ISCSI_NET_PARAM_IPV4_DHCP_VENDOR_ID); iscsi_iface_net_attr(ipv4_iface, dhcp_learn_iqn_en, ISCSI_NET_PARAM_IPV4_DHCP_LEARN_IQN_EN); iscsi_iface_net_attr(ipv4_iface, fragment_disable, ISCSI_NET_PARAM_IPV4_FRAGMENT_DISABLE); iscsi_iface_net_attr(ipv4_iface, incoming_forwarding_en, ISCSI_NET_PARAM_IPV4_IN_FORWARD_EN); iscsi_iface_net_attr(ipv4_iface, ttl, ISCSI_NET_PARAM_IPV4_TTL); /* generic read only ipv6 attribute */ iscsi_iface_net_attr(ipv6_iface, ipaddress, ISCSI_NET_PARAM_IPV6_ADDR); iscsi_iface_net_attr(ipv6_iface, link_local_addr, ISCSI_NET_PARAM_IPV6_LINKLOCAL); iscsi_iface_net_attr(ipv6_iface, router_addr, ISCSI_NET_PARAM_IPV6_ROUTER); iscsi_iface_net_attr(ipv6_iface, ipaddr_autocfg, ISCSI_NET_PARAM_IPV6_ADDR_AUTOCFG); iscsi_iface_net_attr(ipv6_iface, link_local_autocfg, ISCSI_NET_PARAM_IPV6_LINKLOCAL_AUTOCFG); iscsi_iface_net_attr(ipv6_iface, link_local_state, ISCSI_NET_PARAM_IPV6_LINKLOCAL_STATE); iscsi_iface_net_attr(ipv6_iface, router_state, ISCSI_NET_PARAM_IPV6_ROUTER_STATE); iscsi_iface_net_attr(ipv6_iface, grat_neighbor_adv_en, ISCSI_NET_PARAM_IPV6_GRAT_NEIGHBOR_ADV_EN); iscsi_iface_net_attr(ipv6_iface, mld_en, ISCSI_NET_PARAM_IPV6_MLD_EN); iscsi_iface_net_attr(ipv6_iface, flow_label, ISCSI_NET_PARAM_IPV6_FLOW_LABEL); iscsi_iface_net_attr(ipv6_iface, traffic_class, ISCSI_NET_PARAM_IPV6_TRAFFIC_CLASS); iscsi_iface_net_attr(ipv6_iface, hop_limit, ISCSI_NET_PARAM_IPV6_HOP_LIMIT); iscsi_iface_net_attr(ipv6_iface, nd_reachable_tmo, ISCSI_NET_PARAM_IPV6_ND_REACHABLE_TMO); iscsi_iface_net_attr(ipv6_iface, nd_rexmit_time, ISCSI_NET_PARAM_IPV6_ND_REXMIT_TIME); iscsi_iface_net_attr(ipv6_iface, nd_stale_tmo, ISCSI_NET_PARAM_IPV6_ND_STALE_TMO); iscsi_iface_net_attr(ipv6_iface, dup_addr_detect_cnt, ISCSI_NET_PARAM_IPV6_DUP_ADDR_DETECT_CNT); iscsi_iface_net_attr(ipv6_iface, router_adv_link_mtu, ISCSI_NET_PARAM_IPV6_RTR_ADV_LINK_MTU); /* common read only iface attribute */ iscsi_iface_net_attr(iface, enabled, ISCSI_NET_PARAM_IFACE_ENABLE); iscsi_iface_net_attr(iface, vlan_id, ISCSI_NET_PARAM_VLAN_ID); iscsi_iface_net_attr(iface, vlan_priority, ISCSI_NET_PARAM_VLAN_PRIORITY); iscsi_iface_net_attr(iface, vlan_enabled, ISCSI_NET_PARAM_VLAN_ENABLED); iscsi_iface_net_attr(iface, mtu, ISCSI_NET_PARAM_MTU); iscsi_iface_net_attr(iface, port, ISCSI_NET_PARAM_PORT); iscsi_iface_net_attr(iface, ipaddress_state, ISCSI_NET_PARAM_IPADDR_STATE); iscsi_iface_net_attr(iface, delayed_ack_en, ISCSI_NET_PARAM_DELAYED_ACK_EN); iscsi_iface_net_attr(iface, tcp_nagle_disable, ISCSI_NET_PARAM_TCP_NAGLE_DISABLE); iscsi_iface_net_attr(iface, tcp_wsf_disable, ISCSI_NET_PARAM_TCP_WSF_DISABLE); iscsi_iface_net_attr(iface, tcp_wsf, ISCSI_NET_PARAM_TCP_WSF); iscsi_iface_net_attr(iface, tcp_timer_scale, ISCSI_NET_PARAM_TCP_TIMER_SCALE); iscsi_iface_net_attr(iface, tcp_timestamp_en, ISCSI_NET_PARAM_TCP_TIMESTAMP_EN); iscsi_iface_net_attr(iface, cache_id, ISCSI_NET_PARAM_CACHE_ID); iscsi_iface_net_attr(iface, redirect_en, ISCSI_NET_PARAM_REDIRECT_EN); /* common iscsi specific settings attributes */ iscsi_iface_attr(iface, def_taskmgmt_tmo, ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO); iscsi_iface_attr(iface, header_digest, ISCSI_IFACE_PARAM_HDRDGST_EN); iscsi_iface_attr(iface, data_digest, ISCSI_IFACE_PARAM_DATADGST_EN); iscsi_iface_attr(iface, immediate_data, ISCSI_IFACE_PARAM_IMM_DATA_EN); iscsi_iface_attr(iface, initial_r2t, ISCSI_IFACE_PARAM_INITIAL_R2T_EN); iscsi_iface_attr(iface, data_seq_in_order, ISCSI_IFACE_PARAM_DATASEQ_INORDER_EN); iscsi_iface_attr(iface, data_pdu_in_order, ISCSI_IFACE_PARAM_PDU_INORDER_EN); iscsi_iface_attr(iface, erl, ISCSI_IFACE_PARAM_ERL); iscsi_iface_attr(iface, max_recv_dlength, ISCSI_IFACE_PARAM_MAX_RECV_DLENGTH); iscsi_iface_attr(iface, first_burst_len, ISCSI_IFACE_PARAM_FIRST_BURST); iscsi_iface_attr(iface, max_outstanding_r2t, ISCSI_IFACE_PARAM_MAX_R2T); iscsi_iface_attr(iface, max_burst_len, ISCSI_IFACE_PARAM_MAX_BURST); iscsi_iface_attr(iface, chap_auth, ISCSI_IFACE_PARAM_CHAP_AUTH_EN); iscsi_iface_attr(iface, bidi_chap, ISCSI_IFACE_PARAM_BIDI_CHAP_EN); iscsi_iface_attr(iface, discovery_auth_optional, ISCSI_IFACE_PARAM_DISCOVERY_AUTH_OPTIONAL); iscsi_iface_attr(iface, discovery_logout, ISCSI_IFACE_PARAM_DISCOVERY_LOGOUT_EN); iscsi_iface_attr(iface, strict_login_comp_en, ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN); iscsi_iface_attr(iface, initiator_name, ISCSI_IFACE_PARAM_INITIATOR_NAME); static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *dev = container_of(kobj, struct device, kobj); struct iscsi_iface *iface = iscsi_dev_to_iface(dev); struct iscsi_transport *t = iface->transport; int param = -1; if (attr == &dev_attr_iface_def_taskmgmt_tmo.attr) param = ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO; else if (attr == &dev_attr_iface_header_digest.attr) param = ISCSI_IFACE_PARAM_HDRDGST_EN; else if (attr == &dev_attr_iface_data_digest.attr) param = ISCSI_IFACE_PARAM_DATADGST_EN; else if (attr == &dev_attr_iface_immediate_data.attr) param = ISCSI_IFACE_PARAM_IMM_DATA_EN; else if (attr == &dev_attr_iface_initial_r2t.attr) param = ISCSI_IFACE_PARAM_INITIAL_R2T_EN; else if (attr == &dev_attr_iface_data_seq_in_order.attr) param = ISCSI_IFACE_PARAM_DATASEQ_INORDER_EN; else if (attr == &dev_attr_iface_data_pdu_in_order.attr) param = ISCSI_IFACE_PARAM_PDU_INORDER_EN; else if (attr == &dev_attr_iface_erl.attr) param = ISCSI_IFACE_PARAM_ERL; else if (attr == &dev_attr_iface_max_recv_dlength.attr) param = ISCSI_IFACE_PARAM_MAX_RECV_DLENGTH; else if (attr == &dev_attr_iface_first_burst_len.attr) param = ISCSI_IFACE_PARAM_FIRST_BURST; else if (attr == &dev_attr_iface_max_outstanding_r2t.attr) param = ISCSI_IFACE_PARAM_MAX_R2T; else if (attr == &dev_attr_iface_max_burst_len.attr) param = ISCSI_IFACE_PARAM_MAX_BURST; else if (attr == &dev_attr_iface_chap_auth.attr) param = ISCSI_IFACE_PARAM_CHAP_AUTH_EN; else if (attr == &dev_attr_iface_bidi_chap.attr) param = ISCSI_IFACE_PARAM_BIDI_CHAP_EN; else if (attr == &dev_attr_iface_discovery_auth_optional.attr) param = ISCSI_IFACE_PARAM_DISCOVERY_AUTH_OPTIONAL; else if (attr == &dev_attr_iface_discovery_logout.attr) param = ISCSI_IFACE_PARAM_DISCOVERY_LOGOUT_EN; else if (attr == &dev_attr_iface_strict_login_comp_en.attr) param = ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN; else if (attr == &dev_attr_iface_initiator_name.attr) param = ISCSI_IFACE_PARAM_INITIATOR_NAME; if (param != -1) return t->attr_is_visible(ISCSI_IFACE_PARAM, param); if (attr == &dev_attr_iface_enabled.attr) param = ISCSI_NET_PARAM_IFACE_ENABLE; else if (attr == &dev_attr_iface_vlan_id.attr) param = ISCSI_NET_PARAM_VLAN_ID; else if (attr == &dev_attr_iface_vlan_priority.attr) param = ISCSI_NET_PARAM_VLAN_PRIORITY; else if (attr == &dev_attr_iface_vlan_enabled.attr) param = ISCSI_NET_PARAM_VLAN_ENABLED; else if (attr == &dev_attr_iface_mtu.attr) param = ISCSI_NET_PARAM_MTU; else if (attr == &dev_attr_iface_port.attr) param = ISCSI_NET_PARAM_PORT; else if (attr == &dev_attr_iface_ipaddress_state.attr) param = ISCSI_NET_PARAM_IPADDR_STATE; else if (attr == &dev_attr_iface_delayed_ack_en.attr) param = ISCSI_NET_PARAM_DELAYED_ACK_EN; else if (attr == &dev_attr_iface_tcp_nagle_disable.attr) param = ISCSI_NET_PARAM_TCP_NAGLE_DISABLE; else if (attr == &dev_attr_iface_tcp_wsf_disable.attr) param = ISCSI_NET_PARAM_TCP_WSF_DISABLE; else if (attr == &dev_attr_iface_tcp_wsf.attr) param = ISCSI_NET_PARAM_TCP_WSF; else if (attr == &dev_attr_iface_tcp_timer_scale.attr) param = ISCSI_NET_PARAM_TCP_TIMER_SCALE; else if (attr == &dev_attr_iface_tcp_timestamp_en.attr) param = ISCSI_NET_PARAM_TCP_TIMESTAMP_EN; else if (attr == &dev_attr_iface_cache_id.attr) param = ISCSI_NET_PARAM_CACHE_ID; else if (attr == &dev_attr_iface_redirect_en.attr) param = ISCSI_NET_PARAM_REDIRECT_EN; else if (iface->iface_type == ISCSI_IFACE_TYPE_IPV4) { if (attr == &dev_attr_ipv4_iface_ipaddress.attr) param = ISCSI_NET_PARAM_IPV4_ADDR; else if (attr == &dev_attr_ipv4_iface_gateway.attr) param = ISCSI_NET_PARAM_IPV4_GW; else if (attr == &dev_attr_ipv4_iface_subnet.attr) param = ISCSI_NET_PARAM_IPV4_SUBNET; else if (attr == &dev_attr_ipv4_iface_bootproto.attr) param = ISCSI_NET_PARAM_IPV4_BOOTPROTO; else if (attr == &dev_attr_ipv4_iface_dhcp_dns_address_en.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_DNS_ADDR_EN; else if (attr == &dev_attr_ipv4_iface_dhcp_slp_da_info_en.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_SLP_DA_EN; else if (attr == &dev_attr_ipv4_iface_tos_en.attr) param = ISCSI_NET_PARAM_IPV4_TOS_EN; else if (attr == &dev_attr_ipv4_iface_tos.attr) param = ISCSI_NET_PARAM_IPV4_TOS; else if (attr == &dev_attr_ipv4_iface_grat_arp_en.attr) param = ISCSI_NET_PARAM_IPV4_GRAT_ARP_EN; else if (attr == &dev_attr_ipv4_iface_dhcp_alt_client_id_en.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_ALT_CLIENT_ID_EN; else if (attr == &dev_attr_ipv4_iface_dhcp_alt_client_id.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_ALT_CLIENT_ID; else if (attr == &dev_attr_ipv4_iface_dhcp_req_vendor_id_en.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_REQ_VENDOR_ID_EN; else if (attr == &dev_attr_ipv4_iface_dhcp_use_vendor_id_en.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_USE_VENDOR_ID_EN; else if (attr == &dev_attr_ipv4_iface_dhcp_vendor_id.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_VENDOR_ID; else if (attr == &dev_attr_ipv4_iface_dhcp_learn_iqn_en.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_LEARN_IQN_EN; else if (attr == &dev_attr_ipv4_iface_fragment_disable.attr) param = ISCSI_NET_PARAM_IPV4_FRAGMENT_DISABLE; else if (attr == &dev_attr_ipv4_iface_incoming_forwarding_en.attr) param = ISCSI_NET_PARAM_IPV4_IN_FORWARD_EN; else if (attr == &dev_attr_ipv4_iface_ttl.attr) param = ISCSI_NET_PARAM_IPV4_TTL; else return 0; } else if (iface->iface_type == ISCSI_IFACE_TYPE_IPV6) { if (attr == &dev_attr_ipv6_iface_ipaddress.attr) param = ISCSI_NET_PARAM_IPV6_ADDR; else if (attr == &dev_attr_ipv6_iface_link_local_addr.attr) param = ISCSI_NET_PARAM_IPV6_LINKLOCAL; else if (attr == &dev_attr_ipv6_iface_router_addr.attr) param = ISCSI_NET_PARAM_IPV6_ROUTER; else if (attr == &dev_attr_ipv6_iface_ipaddr_autocfg.attr) param = ISCSI_NET_PARAM_IPV6_ADDR_AUTOCFG; else if (attr == &dev_attr_ipv6_iface_link_local_autocfg.attr) param = ISCSI_NET_PARAM_IPV6_LINKLOCAL_AUTOCFG; else if (attr == &dev_attr_ipv6_iface_link_local_state.attr) param = ISCSI_NET_PARAM_IPV6_LINKLOCAL_STATE; else if (attr == &dev_attr_ipv6_iface_router_state.attr) param = ISCSI_NET_PARAM_IPV6_ROUTER_STATE; else if (attr == &dev_attr_ipv6_iface_grat_neighbor_adv_en.attr) param = ISCSI_NET_PARAM_IPV6_GRAT_NEIGHBOR_ADV_EN; else if (attr == &dev_attr_ipv6_iface_mld_en.attr) param = ISCSI_NET_PARAM_IPV6_MLD_EN; else if (attr == &dev_attr_ipv6_iface_flow_label.attr) param = ISCSI_NET_PARAM_IPV6_FLOW_LABEL; else if (attr == &dev_attr_ipv6_iface_traffic_class.attr) param = ISCSI_NET_PARAM_IPV6_TRAFFIC_CLASS; else if (attr == &dev_attr_ipv6_iface_hop_limit.attr) param = ISCSI_NET_PARAM_IPV6_HOP_LIMIT; else if (attr == &dev_attr_ipv6_iface_nd_reachable_tmo.attr) param = ISCSI_NET_PARAM_IPV6_ND_REACHABLE_TMO; else if (attr == &dev_attr_ipv6_iface_nd_rexmit_time.attr) param = ISCSI_NET_PARAM_IPV6_ND_REXMIT_TIME; else if (attr == &dev_attr_ipv6_iface_nd_stale_tmo.attr) param = ISCSI_NET_PARAM_IPV6_ND_STALE_TMO; else if (attr == &dev_attr_ipv6_iface_dup_addr_detect_cnt.attr) param = ISCSI_NET_PARAM_IPV6_DUP_ADDR_DETECT_CNT; else if (attr == &dev_attr_ipv6_iface_router_adv_link_mtu.attr) param = ISCSI_NET_PARAM_IPV6_RTR_ADV_LINK_MTU; else return 0; } else { WARN_ONCE(1, "Invalid iface attr"); return 0; } return t->attr_is_visible(ISCSI_NET_PARAM, param); } static struct attribute *iscsi_iface_attrs[] = { &dev_attr_iface_enabled.attr, &dev_attr_iface_vlan_id.attr, &dev_attr_iface_vlan_priority.attr, &dev_attr_iface_vlan_enabled.attr, &dev_attr_ipv4_iface_ipaddress.attr, &dev_attr_ipv4_iface_gateway.attr, &dev_attr_ipv4_iface_subnet.attr, &dev_attr_ipv4_iface_bootproto.attr, &dev_attr_ipv6_iface_ipaddress.attr, &dev_attr_ipv6_iface_link_local_addr.attr, &dev_attr_ipv6_iface_router_addr.attr, &dev_attr_ipv6_iface_ipaddr_autocfg.attr, &dev_attr_ipv6_iface_link_local_autocfg.attr, &dev_attr_iface_mtu.attr, &dev_attr_iface_port.attr, &dev_attr_iface_ipaddress_state.attr, &dev_attr_iface_delayed_ack_en.attr, &dev_attr_iface_tcp_nagle_disable.attr, &dev_attr_iface_tcp_wsf_disable.attr, &dev_attr_iface_tcp_wsf.attr, &dev_attr_iface_tcp_timer_scale.attr, &dev_attr_iface_tcp_timestamp_en.attr, &dev_attr_iface_cache_id.attr, &dev_attr_iface_redirect_en.attr, &dev_attr_iface_def_taskmgmt_tmo.attr, &dev_attr_iface_header_digest.attr, &dev_attr_iface_data_digest.attr, &dev_attr_iface_immediate_data.attr, &dev_attr_iface_initial_r2t.attr, &dev_attr_iface_data_seq_in_order.attr, &dev_attr_iface_data_pdu_in_order.attr, &dev_attr_iface_erl.attr, &dev_attr_iface_max_recv_dlength.attr, &dev_attr_iface_first_burst_len.attr, &dev_attr_iface_max_outstanding_r2t.attr, &dev_attr_iface_max_burst_len.attr, &dev_attr_iface_chap_auth.attr, &dev_attr_iface_bidi_chap.attr, &dev_attr_iface_discovery_auth_optional.attr, &dev_attr_iface_discovery_logout.attr, &dev_attr_iface_strict_login_comp_en.attr, &dev_attr_iface_initiator_name.attr, &dev_attr_ipv4_iface_dhcp_dns_address_en.attr, &dev_attr_ipv4_iface_dhcp_slp_da_info_en.attr, &dev_attr_ipv4_iface_tos_en.attr, &dev_attr_ipv4_iface_tos.attr, &dev_attr_ipv4_iface_grat_arp_en.attr, &dev_attr_ipv4_iface_dhcp_alt_client_id_en.attr, &dev_attr_ipv4_iface_dhcp_alt_client_id.attr, &dev_attr_ipv4_iface_dhcp_req_vendor_id_en.attr, &dev_attr_ipv4_iface_dhcp_use_vendor_id_en.attr, &dev_attr_ipv4_iface_dhcp_vendor_id.attr, &dev_attr_ipv4_iface_dhcp_learn_iqn_en.attr, &dev_attr_ipv4_iface_fragment_disable.attr, &dev_attr_ipv4_iface_incoming_forwarding_en.attr, &dev_attr_ipv4_iface_ttl.attr, &dev_attr_ipv6_iface_link_local_state.attr, &dev_attr_ipv6_iface_router_state.attr, &dev_attr_ipv6_iface_grat_neighbor_adv_en.attr, &dev_attr_ipv6_iface_mld_en.attr, &dev_attr_ipv6_iface_flow_label.attr, &dev_attr_ipv6_iface_traffic_class.attr, &dev_attr_ipv6_iface_hop_limit.attr, &dev_attr_ipv6_iface_nd_reachable_tmo.attr, &dev_attr_ipv6_iface_nd_rexmit_time.attr, &dev_attr_ipv6_iface_nd_stale_tmo.attr, &dev_attr_ipv6_iface_dup_addr_detect_cnt.attr, &dev_attr_ipv6_iface_router_adv_link_mtu.attr, NULL, }; static struct attribute_group iscsi_iface_group = { .attrs = iscsi_iface_attrs, .is_visible = iscsi_iface_attr_is_visible, }; /* convert iscsi_ipaddress_state values to ascii string name */ static const struct { enum iscsi_ipaddress_state value; char *name; } iscsi_ipaddress_state_names[] = { {ISCSI_IPDDRESS_STATE_UNCONFIGURED, "Unconfigured" }, {ISCSI_IPDDRESS_STATE_ACQUIRING, "Acquiring" }, {ISCSI_IPDDRESS_STATE_TENTATIVE, "Tentative" }, {ISCSI_IPDDRESS_STATE_VALID, "Valid" }, {ISCSI_IPDDRESS_STATE_DISABLING, "Disabling" }, {ISCSI_IPDDRESS_STATE_INVALID, "Invalid" }, {ISCSI_IPDDRESS_STATE_DEPRECATED, "Deprecated" }, }; char *iscsi_get_ipaddress_state_name(enum iscsi_ipaddress_state port_state) { int i; char *state = NULL; for (i = 0; i < ARRAY_SIZE(iscsi_ipaddress_state_names); i++) { if (iscsi_ipaddress_state_names[i].value == port_state) { state = iscsi_ipaddress_state_names[i].name; break; } } return state; } EXPORT_SYMBOL_GPL(iscsi_get_ipaddress_state_name); /* convert iscsi_router_state values to ascii string name */ static const struct { enum iscsi_router_state value; char *name; } iscsi_router_state_names[] = { {ISCSI_ROUTER_STATE_UNKNOWN, "Unknown" }, {ISCSI_ROUTER_STATE_ADVERTISED, "Advertised" }, {ISCSI_ROUTER_STATE_MANUAL, "Manual" }, {ISCSI_ROUTER_STATE_STALE, "Stale" }, }; char *iscsi_get_router_state_name(enum iscsi_router_state router_state) { int i; char *state = NULL; for (i = 0; i < ARRAY_SIZE(iscsi_router_state_names); i++) { if (iscsi_router_state_names[i].value == router_state) { state = iscsi_router_state_names[i].name; break; } } return state; } EXPORT_SYMBOL_GPL(iscsi_get_router_state_name); struct iscsi_iface * iscsi_create_iface(struct Scsi_Host *shost, struct iscsi_transport *transport, uint32_t iface_type, uint32_t iface_num, int dd_size) { struct iscsi_iface *iface; int err; iface = kzalloc(sizeof(*iface) + dd_size, GFP_KERNEL); if (!iface) return NULL; iface->transport = transport; iface->iface_type = iface_type; iface->iface_num = iface_num; iface->dev.release = iscsi_iface_release; iface->dev.class = &iscsi_iface_class; /* parent reference released in iscsi_iface_release */ iface->dev.parent = get_device(&shost->shost_gendev); if (iface_type == ISCSI_IFACE_TYPE_IPV4) dev_set_name(&iface->dev, "ipv4-iface-%u-%u", shost->host_no, iface_num); else dev_set_name(&iface->dev, "ipv6-iface-%u-%u", shost->host_no, iface_num); err = device_register(&iface->dev); if (err) goto put_dev; err = sysfs_create_group(&iface->dev.kobj, &iscsi_iface_group); if (err) goto unreg_iface; if (dd_size) iface->dd_data = &iface[1]; return iface; unreg_iface: device_unregister(&iface->dev); return NULL; put_dev: put_device(&iface->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_iface); void iscsi_destroy_iface(struct iscsi_iface *iface) { sysfs_remove_group(&iface->dev.kobj, &iscsi_iface_group); device_unregister(&iface->dev); } EXPORT_SYMBOL_GPL(iscsi_destroy_iface); /* * Interface to display flash node params to sysfs */ #define ISCSI_FLASHNODE_ATTR(_prefix, _name, _mode, _show, _store) \ struct device_attribute dev_attr_##_prefix##_##_name = \ __ATTR(_name, _mode, _show, _store) /* flash node session attrs show */ #define iscsi_flashnode_sess_attr_show(type, name, param) \ static ssize_t \ show_##type##_##name(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct iscsi_bus_flash_session *fnode_sess = \ iscsi_dev_to_flash_session(dev);\ struct iscsi_transport *t = fnode_sess->transport; \ return t->get_flashnode_param(fnode_sess, param, buf); \ } \ #define iscsi_flashnode_sess_attr(type, name, param) \ iscsi_flashnode_sess_attr_show(type, name, param) \ static ISCSI_FLASHNODE_ATTR(type, name, S_IRUGO, \ show_##type##_##name, NULL); /* Flash node session attributes */ iscsi_flashnode_sess_attr(fnode, auto_snd_tgt_disable, ISCSI_FLASHNODE_AUTO_SND_TGT_DISABLE); iscsi_flashnode_sess_attr(fnode, discovery_session, ISCSI_FLASHNODE_DISCOVERY_SESS); iscsi_flashnode_sess_attr(fnode, portal_type, ISCSI_FLASHNODE_PORTAL_TYPE); iscsi_flashnode_sess_attr(fnode, entry_enable, ISCSI_FLASHNODE_ENTRY_EN); iscsi_flashnode_sess_attr(fnode, immediate_data, ISCSI_FLASHNODE_IMM_DATA_EN); iscsi_flashnode_sess_attr(fnode, initial_r2t, ISCSI_FLASHNODE_INITIAL_R2T_EN); iscsi_flashnode_sess_attr(fnode, data_seq_in_order, ISCSI_FLASHNODE_DATASEQ_INORDER); iscsi_flashnode_sess_attr(fnode, data_pdu_in_order, ISCSI_FLASHNODE_PDU_INORDER); iscsi_flashnode_sess_attr(fnode, chap_auth, ISCSI_FLASHNODE_CHAP_AUTH_EN); iscsi_flashnode_sess_attr(fnode, discovery_logout, ISCSI_FLASHNODE_DISCOVERY_LOGOUT_EN); iscsi_flashnode_sess_attr(fnode, bidi_chap, ISCSI_FLASHNODE_BIDI_CHAP_EN); iscsi_flashnode_sess_attr(fnode, discovery_auth_optional, ISCSI_FLASHNODE_DISCOVERY_AUTH_OPTIONAL); iscsi_flashnode_sess_attr(fnode, erl, ISCSI_FLASHNODE_ERL); iscsi_flashnode_sess_attr(fnode, first_burst_len, ISCSI_FLASHNODE_FIRST_BURST); iscsi_flashnode_sess_attr(fnode, def_time2wait, ISCSI_FLASHNODE_DEF_TIME2WAIT); iscsi_flashnode_sess_attr(fnode, def_time2retain, ISCSI_FLASHNODE_DEF_TIME2RETAIN); iscsi_flashnode_sess_attr(fnode, max_outstanding_r2t, ISCSI_FLASHNODE_MAX_R2T); iscsi_flashnode_sess_attr(fnode, isid, ISCSI_FLASHNODE_ISID); iscsi_flashnode_sess_attr(fnode, tsid, ISCSI_FLASHNODE_TSID); iscsi_flashnode_sess_attr(fnode, max_burst_len, ISCSI_FLASHNODE_MAX_BURST); iscsi_flashnode_sess_attr(fnode, def_taskmgmt_tmo, ISCSI_FLASHNODE_DEF_TASKMGMT_TMO); iscsi_flashnode_sess_attr(fnode, targetalias, ISCSI_FLASHNODE_ALIAS); iscsi_flashnode_sess_attr(fnode, targetname, ISCSI_FLASHNODE_NAME); iscsi_flashnode_sess_attr(fnode, tpgt, ISCSI_FLASHNODE_TPGT); iscsi_flashnode_sess_attr(fnode, discovery_parent_idx, ISCSI_FLASHNODE_DISCOVERY_PARENT_IDX); iscsi_flashnode_sess_attr(fnode, discovery_parent_type, ISCSI_FLASHNODE_DISCOVERY_PARENT_TYPE); iscsi_flashnode_sess_attr(fnode, chap_in_idx, ISCSI_FLASHNODE_CHAP_IN_IDX); iscsi_flashnode_sess_attr(fnode, chap_out_idx, ISCSI_FLASHNODE_CHAP_OUT_IDX); iscsi_flashnode_sess_attr(fnode, username, ISCSI_FLASHNODE_USERNAME); iscsi_flashnode_sess_attr(fnode, username_in, ISCSI_FLASHNODE_USERNAME_IN); iscsi_flashnode_sess_attr(fnode, password, ISCSI_FLASHNODE_PASSWORD); iscsi_flashnode_sess_attr(fnode, password_in, ISCSI_FLASHNODE_PASSWORD_IN); iscsi_flashnode_sess_attr(fnode, is_boot_target, ISCSI_FLASHNODE_IS_BOOT_TGT); static struct attribute *iscsi_flashnode_sess_attrs[] = { &dev_attr_fnode_auto_snd_tgt_disable.attr, &dev_attr_fnode_discovery_session.attr, &dev_attr_fnode_portal_type.attr, &dev_attr_fnode_entry_enable.attr, &dev_attr_fnode_immediate_data.attr, &dev_attr_fnode_initial_r2t.attr, &dev_attr_fnode_data_seq_in_order.attr, &dev_attr_fnode_data_pdu_in_order.attr, &dev_attr_fnode_chap_auth.attr, &dev_attr_fnode_discovery_logout.attr, &dev_attr_fnode_bidi_chap.attr, &dev_attr_fnode_discovery_auth_optional.attr, &dev_attr_fnode_erl.attr, &dev_attr_fnode_first_burst_len.attr, &dev_attr_fnode_def_time2wait.attr, &dev_attr_fnode_def_time2retain.attr, &dev_attr_fnode_max_outstanding_r2t.attr, &dev_attr_fnode_isid.attr, &dev_attr_fnode_tsid.attr, &dev_attr_fnode_max_burst_len.attr, &dev_attr_fnode_def_taskmgmt_tmo.attr, &dev_attr_fnode_targetalias.attr, &dev_attr_fnode_targetname.attr, &dev_attr_fnode_tpgt.attr, &dev_attr_fnode_discovery_parent_idx.attr, &dev_attr_fnode_discovery_parent_type.attr, &dev_attr_fnode_chap_in_idx.attr, &dev_attr_fnode_chap_out_idx.attr, &dev_attr_fnode_username.attr, &dev_attr_fnode_username_in.attr, &dev_attr_fnode_password.attr, &dev_attr_fnode_password_in.attr, &dev_attr_fnode_is_boot_target.attr, NULL, }; static umode_t iscsi_flashnode_sess_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *dev = container_of(kobj, struct device, kobj); struct iscsi_bus_flash_session *fnode_sess = iscsi_dev_to_flash_session(dev); struct iscsi_transport *t = fnode_sess->transport; int param; if (attr == &dev_attr_fnode_auto_snd_tgt_disable.attr) { param = ISCSI_FLASHNODE_AUTO_SND_TGT_DISABLE; } else if (attr == &dev_attr_fnode_discovery_session.attr) { param = ISCSI_FLASHNODE_DISCOVERY_SESS; } else if (attr == &dev_attr_fnode_portal_type.attr) { param = ISCSI_FLASHNODE_PORTAL_TYPE; } else if (attr == &dev_attr_fnode_entry_enable.attr) { param = ISCSI_FLASHNODE_ENTRY_EN; } else if (attr == &dev_attr_fnode_immediate_data.attr) { param = ISCSI_FLASHNODE_IMM_DATA_EN; } else if (attr == &dev_attr_fnode_initial_r2t.attr) { param = ISCSI_FLASHNODE_INITIAL_R2T_EN; } else if (attr == &dev_attr_fnode_data_seq_in_order.attr) { param = ISCSI_FLASHNODE_DATASEQ_INORDER; } else if (attr == &dev_attr_fnode_data_pdu_in_order.attr) { param = ISCSI_FLASHNODE_PDU_INORDER; } else if (attr == &dev_attr_fnode_chap_auth.attr) { param = ISCSI_FLASHNODE_CHAP_AUTH_EN; } else if (attr == &dev_attr_fnode_discovery_logout.attr) { param = ISCSI_FLASHNODE_DISCOVERY_LOGOUT_EN; } else if (attr == &dev_attr_fnode_bidi_chap.attr) { param = ISCSI_FLASHNODE_BIDI_CHAP_EN; } else if (attr == &dev_attr_fnode_discovery_auth_optional.attr) { param = ISCSI_FLASHNODE_DISCOVERY_AUTH_OPTIONAL; } else if (attr == &dev_attr_fnode_erl.attr) { param = ISCSI_FLASHNODE_ERL; } else if (attr == &dev_attr_fnode_first_burst_len.attr) { param = ISCSI_FLASHNODE_FIRST_BURST; } else if (attr == &dev_attr_fnode_def_time2wait.attr) { param = ISCSI_FLASHNODE_DEF_TIME2WAIT; } else if (attr == &dev_attr_fnode_def_time2retain.attr) { param = ISCSI_FLASHNODE_DEF_TIME2RETAIN; } else if (attr == &dev_attr_fnode_max_outstanding_r2t.attr) { param = ISCSI_FLASHNODE_MAX_R2T; } else if (attr == &dev_attr_fnode_isid.attr) { param = ISCSI_FLASHNODE_ISID; } else if (attr == &dev_attr_fnode_tsid.attr) { param = ISCSI_FLASHNODE_TSID; } else if (attr == &dev_attr_fnode_max_burst_len.attr) { param = ISCSI_FLASHNODE_MAX_BURST; } else if (attr == &dev_attr_fnode_def_taskmgmt_tmo.attr) { param = ISCSI_FLASHNODE_DEF_TASKMGMT_TMO; } else if (attr == &dev_attr_fnode_targetalias.attr) { param = ISCSI_FLASHNODE_ALIAS; } else if (attr == &dev_attr_fnode_targetname.attr) { param = ISCSI_FLASHNODE_NAME; } else if (attr == &dev_attr_fnode_tpgt.attr) { param = ISCSI_FLASHNODE_TPGT; } else if (attr == &dev_attr_fnode_discovery_parent_idx.attr) { param = ISCSI_FLASHNODE_DISCOVERY_PARENT_IDX; } else if (attr == &dev_attr_fnode_discovery_parent_type.attr) { param = ISCSI_FLASHNODE_DISCOVERY_PARENT_TYPE; } else if (attr == &dev_attr_fnode_chap_in_idx.attr) { param = ISCSI_FLASHNODE_CHAP_IN_IDX; } else if (attr == &dev_attr_fnode_chap_out_idx.attr) { param = ISCSI_FLASHNODE_CHAP_OUT_IDX; } else if (attr == &dev_attr_fnode_username.attr) { param = ISCSI_FLASHNODE_USERNAME; } else if (attr == &dev_attr_fnode_username_in.attr) { param = ISCSI_FLASHNODE_USERNAME_IN; } else if (attr == &dev_attr_fnode_password.attr) { param = ISCSI_FLASHNODE_PASSWORD; } else if (attr == &dev_attr_fnode_password_in.attr) { param = ISCSI_FLASHNODE_PASSWORD_IN; } else if (attr == &dev_attr_fnode_is_boot_target.attr) { param = ISCSI_FLASHNODE_IS_BOOT_TGT; } else { WARN_ONCE(1, "Invalid flashnode session attr"); return 0; } return t->attr_is_visible(ISCSI_FLASHNODE_PARAM, param); } static struct attribute_group iscsi_flashnode_sess_attr_group = { .attrs = iscsi_flashnode_sess_attrs, .is_visible = iscsi_flashnode_sess_attr_is_visible, }; static const struct attribute_group *iscsi_flashnode_sess_attr_groups[] = { &iscsi_flashnode_sess_attr_group, NULL, }; static void iscsi_flashnode_sess_release(struct device *dev) { struct iscsi_bus_flash_session *fnode_sess = iscsi_dev_to_flash_session(dev); kfree(fnode_sess->targetname); kfree(fnode_sess->targetalias); kfree(fnode_sess->portal_type); kfree(fnode_sess); } static const struct device_type iscsi_flashnode_sess_dev_type = { .name = "iscsi_flashnode_sess_dev_type", .groups = iscsi_flashnode_sess_attr_groups, .release = iscsi_flashnode_sess_release, }; /* flash node connection attrs show */ #define iscsi_flashnode_conn_attr_show(type, name, param) \ static ssize_t \ show_##type##_##name(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct iscsi_bus_flash_conn *fnode_conn = iscsi_dev_to_flash_conn(dev);\ struct iscsi_bus_flash_session *fnode_sess = \ iscsi_flash_conn_to_flash_session(fnode_conn);\ struct iscsi_transport *t = fnode_conn->transport; \ return t->get_flashnode_param(fnode_sess, param, buf); \ } \ #define iscsi_flashnode_conn_attr(type, name, param) \ iscsi_flashnode_conn_attr_show(type, name, param) \ static ISCSI_FLASHNODE_ATTR(type, name, S_IRUGO, \ show_##type##_##name, NULL); /* Flash node connection attributes */ iscsi_flashnode_conn_attr(fnode, is_fw_assigned_ipv6, ISCSI_FLASHNODE_IS_FW_ASSIGNED_IPV6); iscsi_flashnode_conn_attr(fnode, header_digest, ISCSI_FLASHNODE_HDR_DGST_EN); iscsi_flashnode_conn_attr(fnode, data_digest, ISCSI_FLASHNODE_DATA_DGST_EN); iscsi_flashnode_conn_attr(fnode, snack_req, ISCSI_FLASHNODE_SNACK_REQ_EN); iscsi_flashnode_conn_attr(fnode, tcp_timestamp_stat, ISCSI_FLASHNODE_TCP_TIMESTAMP_STAT); iscsi_flashnode_conn_attr(fnode, tcp_nagle_disable, ISCSI_FLASHNODE_TCP_NAGLE_DISABLE); iscsi_flashnode_conn_attr(fnode, tcp_wsf_disable, ISCSI_FLASHNODE_TCP_WSF_DISABLE); iscsi_flashnode_conn_attr(fnode, tcp_timer_scale, ISCSI_FLASHNODE_TCP_TIMER_SCALE); iscsi_flashnode_conn_attr(fnode, tcp_timestamp_enable, ISCSI_FLASHNODE_TCP_TIMESTAMP_EN); iscsi_flashnode_conn_attr(fnode, fragment_disable, ISCSI_FLASHNODE_IP_FRAG_DISABLE); iscsi_flashnode_conn_attr(fnode, keepalive_tmo, ISCSI_FLASHNODE_KEEPALIVE_TMO); iscsi_flashnode_conn_attr(fnode, port, ISCSI_FLASHNODE_PORT); iscsi_flashnode_conn_attr(fnode, ipaddress, ISCSI_FLASHNODE_IPADDR); iscsi_flashnode_conn_attr(fnode, max_recv_dlength, ISCSI_FLASHNODE_MAX_RECV_DLENGTH); iscsi_flashnode_conn_attr(fnode, max_xmit_dlength, ISCSI_FLASHNODE_MAX_XMIT_DLENGTH); iscsi_flashnode_conn_attr(fnode, local_port, ISCSI_FLASHNODE_LOCAL_PORT); iscsi_flashnode_conn_attr(fnode, ipv4_tos, ISCSI_FLASHNODE_IPV4_TOS); iscsi_flashnode_conn_attr(fnode, ipv6_traffic_class, ISCSI_FLASHNODE_IPV6_TC); iscsi_flashnode_conn_attr(fnode, ipv6_flow_label, ISCSI_FLASHNODE_IPV6_FLOW_LABEL); iscsi_flashnode_conn_attr(fnode, redirect_ipaddr, ISCSI_FLASHNODE_REDIRECT_IPADDR); iscsi_flashnode_conn_attr(fnode, max_segment_size, ISCSI_FLASHNODE_MAX_SEGMENT_SIZE); iscsi_flashnode_conn_attr(fnode, link_local_ipv6, ISCSI_FLASHNODE_LINK_LOCAL_IPV6); iscsi_flashnode_conn_attr(fnode, tcp_xmit_wsf, ISCSI_FLASHNODE_TCP_XMIT_WSF); iscsi_flashnode_conn_attr(fnode, tcp_recv_wsf, ISCSI_FLASHNODE_TCP_RECV_WSF); iscsi_flashnode_conn_attr(fnode, statsn, ISCSI_FLASHNODE_STATSN); iscsi_flashnode_conn_attr(fnode, exp_statsn, ISCSI_FLASHNODE_EXP_STATSN); static struct attribute *iscsi_flashnode_conn_attrs[] = { &dev_attr_fnode_is_fw_assigned_ipv6.attr, &dev_attr_fnode_header_digest.attr, &dev_attr_fnode_data_digest.attr, &dev_attr_fnode_snack_req.attr, &dev_attr_fnode_tcp_timestamp_stat.attr, &dev_attr_fnode_tcp_nagle_disable.attr, &dev_attr_fnode_tcp_wsf_disable.attr, &dev_attr_fnode_tcp_timer_scale.attr, &dev_attr_fnode_tcp_timestamp_enable.attr, &dev_attr_fnode_fragment_disable.attr, &dev_attr_fnode_max_recv_dlength.attr, &dev_attr_fnode_max_xmit_dlength.attr, &dev_attr_fnode_keepalive_tmo.attr, &dev_attr_fnode_port.attr, &dev_attr_fnode_ipaddress.attr, &dev_attr_fnode_redirect_ipaddr.attr, &dev_attr_fnode_max_segment_size.attr, &dev_attr_fnode_local_port.attr, &dev_attr_fnode_ipv4_tos.attr, &dev_attr_fnode_ipv6_traffic_class.attr, &dev_attr_fnode_ipv6_flow_label.attr, &dev_attr_fnode_link_local_ipv6.attr, &dev_attr_fnode_tcp_xmit_wsf.attr, &dev_attr_fnode_tcp_recv_wsf.attr, &dev_attr_fnode_statsn.attr, &dev_attr_fnode_exp_statsn.attr, NULL, }; static umode_t iscsi_flashnode_conn_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *dev = container_of(kobj, struct device, kobj); struct iscsi_bus_flash_conn *fnode_conn = iscsi_dev_to_flash_conn(dev); struct iscsi_transport *t = fnode_conn->transport; int param; if (attr == &dev_attr_fnode_is_fw_assigned_ipv6.attr) { param = ISCSI_FLASHNODE_IS_FW_ASSIGNED_IPV6; } else if (attr == &dev_attr_fnode_header_digest.attr) { param = ISCSI_FLASHNODE_HDR_DGST_EN; } else if (attr == &dev_attr_fnode_data_digest.attr) { param = ISCSI_FLASHNODE_DATA_DGST_EN; } else if (attr == &dev_attr_fnode_snack_req.attr) { param = ISCSI_FLASHNODE_SNACK_REQ_EN; } else if (attr == &dev_attr_fnode_tcp_timestamp_stat.attr) { param = ISCSI_FLASHNODE_TCP_TIMESTAMP_STAT; } else if (attr == &dev_attr_fnode_tcp_nagle_disable.attr) { param = ISCSI_FLASHNODE_TCP_NAGLE_DISABLE; } else if (attr == &dev_attr_fnode_tcp_wsf_disable.attr) { param = ISCSI_FLASHNODE_TCP_WSF_DISABLE; } else if (attr == &dev_attr_fnode_tcp_timer_scale.attr) { param = ISCSI_FLASHNODE_TCP_TIMER_SCALE; } else if (attr == &dev_attr_fnode_tcp_timestamp_enable.attr) { param = ISCSI_FLASHNODE_TCP_TIMESTAMP_EN; } else if (attr == &dev_attr_fnode_fragment_disable.attr) { param = ISCSI_FLASHNODE_IP_FRAG_DISABLE; } else if (attr == &dev_attr_fnode_max_recv_dlength.attr) { param = ISCSI_FLASHNODE_MAX_RECV_DLENGTH; } else if (attr == &dev_attr_fnode_max_xmit_dlength.attr) { param = ISCSI_FLASHNODE_MAX_XMIT_DLENGTH; } else if (attr == &dev_attr_fnode_keepalive_tmo.attr) { param = ISCSI_FLASHNODE_KEEPALIVE_TMO; } else if (attr == &dev_attr_fnode_port.attr) { param = ISCSI_FLASHNODE_PORT; } else if (attr == &dev_attr_fnode_ipaddress.attr) { param = ISCSI_FLASHNODE_IPADDR; } else if (attr == &dev_attr_fnode_redirect_ipaddr.attr) { param = ISCSI_FLASHNODE_REDIRECT_IPADDR; } else if (attr == &dev_attr_fnode_max_segment_size.attr) { param = ISCSI_FLASHNODE_MAX_SEGMENT_SIZE; } else if (attr == &dev_attr_fnode_local_port.attr) { param = ISCSI_FLASHNODE_LOCAL_PORT; } else if (attr == &dev_attr_fnode_ipv4_tos.attr) { param = ISCSI_FLASHNODE_IPV4_TOS; } else if (attr == &dev_attr_fnode_ipv6_traffic_class.attr) { param = ISCSI_FLASHNODE_IPV6_TC; } else if (attr == &dev_attr_fnode_ipv6_flow_label.attr) { param = ISCSI_FLASHNODE_IPV6_FLOW_LABEL; } else if (attr == &dev_attr_fnode_link_local_ipv6.attr) { param = ISCSI_FLASHNODE_LINK_LOCAL_IPV6; } else if (attr == &dev_attr_fnode_tcp_xmit_wsf.attr) { param = ISCSI_FLASHNODE_TCP_XMIT_WSF; } else if (attr == &dev_attr_fnode_tcp_recv_wsf.attr) { param = ISCSI_FLASHNODE_TCP_RECV_WSF; } else if (attr == &dev_attr_fnode_statsn.attr) { param = ISCSI_FLASHNODE_STATSN; } else if (attr == &dev_attr_fnode_exp_statsn.attr) { param = ISCSI_FLASHNODE_EXP_STATSN; } else { WARN_ONCE(1, "Invalid flashnode connection attr"); return 0; } return t->attr_is_visible(ISCSI_FLASHNODE_PARAM, param); } static struct attribute_group iscsi_flashnode_conn_attr_group = { .attrs = iscsi_flashnode_conn_attrs, .is_visible = iscsi_flashnode_conn_attr_is_visible, }; static const struct attribute_group *iscsi_flashnode_conn_attr_groups[] = { &iscsi_flashnode_conn_attr_group, NULL, }; static void iscsi_flashnode_conn_release(struct device *dev) { struct iscsi_bus_flash_conn *fnode_conn = iscsi_dev_to_flash_conn(dev); kfree(fnode_conn->ipaddress); kfree(fnode_conn->redirect_ipaddr); kfree(fnode_conn->link_local_ipv6_addr); kfree(fnode_conn); } static const struct device_type iscsi_flashnode_conn_dev_type = { .name = "iscsi_flashnode_conn_dev_type", .groups = iscsi_flashnode_conn_attr_groups, .release = iscsi_flashnode_conn_release, }; static struct bus_type iscsi_flashnode_bus; int iscsi_flashnode_bus_match(struct device *dev, struct device_driver *drv) { if (dev->bus == &iscsi_flashnode_bus) return 1; return 0; } EXPORT_SYMBOL_GPL(iscsi_flashnode_bus_match); static struct bus_type iscsi_flashnode_bus = { .name = "iscsi_flashnode", .match = &iscsi_flashnode_bus_match, }; /** * iscsi_create_flashnode_sess - Add flashnode session entry in sysfs * @shost: pointer to host data * @index: index of flashnode to add in sysfs * @transport: pointer to transport data * @dd_size: total size to allocate * * Adds a sysfs entry for the flashnode session attributes * * Returns: * pointer to allocated flashnode sess on success * %NULL on failure */ struct iscsi_bus_flash_session * iscsi_create_flashnode_sess(struct Scsi_Host *shost, int index, struct iscsi_transport *transport, int dd_size) { struct iscsi_bus_flash_session *fnode_sess; int err; fnode_sess = kzalloc(sizeof(*fnode_sess) + dd_size, GFP_KERNEL); if (!fnode_sess) return NULL; fnode_sess->transport = transport; fnode_sess->target_id = index; fnode_sess->dev.type = &iscsi_flashnode_sess_dev_type; fnode_sess->dev.bus = &iscsi_flashnode_bus; fnode_sess->dev.parent = &shost->shost_gendev; dev_set_name(&fnode_sess->dev, "flashnode_sess-%u:%u", shost->host_no, index); err = device_register(&fnode_sess->dev); if (err) goto put_dev; if (dd_size) fnode_sess->dd_data = &fnode_sess[1]; return fnode_sess; put_dev: put_device(&fnode_sess->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_flashnode_sess); /** * iscsi_create_flashnode_conn - Add flashnode conn entry in sysfs * @shost: pointer to host data * @fnode_sess: pointer to the parent flashnode session entry * @transport: pointer to transport data * @dd_size: total size to allocate * * Adds a sysfs entry for the flashnode connection attributes * * Returns: * pointer to allocated flashnode conn on success * %NULL on failure */ struct iscsi_bus_flash_conn * iscsi_create_flashnode_conn(struct Scsi_Host *shost, struct iscsi_bus_flash_session *fnode_sess, struct iscsi_transport *transport, int dd_size) { struct iscsi_bus_flash_conn *fnode_conn; int err; fnode_conn = kzalloc(sizeof(*fnode_conn) + dd_size, GFP_KERNEL); if (!fnode_conn) return NULL; fnode_conn->transport = transport; fnode_conn->dev.type = &iscsi_flashnode_conn_dev_type; fnode_conn->dev.bus = &iscsi_flashnode_bus; fnode_conn->dev.parent = &fnode_sess->dev; dev_set_name(&fnode_conn->dev, "flashnode_conn-%u:%u:0", shost->host_no, fnode_sess->target_id); err = device_register(&fnode_conn->dev); if (err) goto put_dev; if (dd_size) fnode_conn->dd_data = &fnode_conn[1]; return fnode_conn; put_dev: put_device(&fnode_conn->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_flashnode_conn); /** * iscsi_is_flashnode_conn_dev - verify passed device is to be flashnode conn * @dev: device to verify * @data: pointer to data containing value to use for verification * * Verifies if the passed device is flashnode conn device * * Returns: * 1 on success * 0 on failure */ static int iscsi_is_flashnode_conn_dev(struct device *dev, void *data) { return dev->bus == &iscsi_flashnode_bus; } static int iscsi_destroy_flashnode_conn(struct iscsi_bus_flash_conn *fnode_conn) { device_unregister(&fnode_conn->dev); return 0; } static int flashnode_match_index(struct device *dev, void *data) { struct iscsi_bus_flash_session *fnode_sess = NULL; int ret = 0; if (!iscsi_flashnode_bus_match(dev, NULL)) goto exit_match_index; fnode_sess = iscsi_dev_to_flash_session(dev); ret = (fnode_sess->target_id == *((int *)data)) ? 1 : 0; exit_match_index: return ret; } /** * iscsi_get_flashnode_by_index -finds flashnode session entry by index * @shost: pointer to host data * @idx: index to match * * Finds the flashnode session object for the passed index * * Returns: * pointer to found flashnode session object on success * %NULL on failure */ static struct iscsi_bus_flash_session * iscsi_get_flashnode_by_index(struct Scsi_Host *shost, uint32_t idx) { struct iscsi_bus_flash_session *fnode_sess = NULL; struct device *dev; dev = device_find_child(&shost->shost_gendev, &idx, flashnode_match_index); if (dev) fnode_sess = iscsi_dev_to_flash_session(dev); return fnode_sess; } /** * iscsi_find_flashnode_sess - finds flashnode session entry * @shost: pointer to host data * @data: pointer to data containing value to use for comparison * @fn: function pointer that does actual comparison * * Finds the flashnode session object comparing the data passed using logic * defined in passed function pointer * * Returns: * pointer to found flashnode session device object on success * %NULL on failure */ struct device * iscsi_find_flashnode_sess(struct Scsi_Host *shost, void *data, int (*fn)(struct device *dev, void *data)) { return device_find_child(&shost->shost_gendev, data, fn); } EXPORT_SYMBOL_GPL(iscsi_find_flashnode_sess); /** * iscsi_find_flashnode_conn - finds flashnode connection entry * @fnode_sess: pointer to parent flashnode session entry * * Finds the flashnode connection object comparing the data passed using logic * defined in passed function pointer * * Returns: * pointer to found flashnode connection device object on success * %NULL on failure */ struct device * iscsi_find_flashnode_conn(struct iscsi_bus_flash_session *fnode_sess) { return device_find_child(&fnode_sess->dev, NULL, iscsi_is_flashnode_conn_dev); } EXPORT_SYMBOL_GPL(iscsi_find_flashnode_conn); static int iscsi_iter_destroy_flashnode_conn_fn(struct device *dev, void *data) { if (!iscsi_is_flashnode_conn_dev(dev, NULL)) return 0; return iscsi_destroy_flashnode_conn(iscsi_dev_to_flash_conn(dev)); } /** * iscsi_destroy_flashnode_sess - destroy flashnode session entry * @fnode_sess: pointer to flashnode session entry to be destroyed * * Deletes the flashnode session entry and all children flashnode connection * entries from sysfs */ void iscsi_destroy_flashnode_sess(struct iscsi_bus_flash_session *fnode_sess) { int err; err = device_for_each_child(&fnode_sess->dev, NULL, iscsi_iter_destroy_flashnode_conn_fn); if (err) pr_err("Could not delete all connections for %s. Error %d.\n", fnode_sess->dev.kobj.name, err); device_unregister(&fnode_sess->dev); } EXPORT_SYMBOL_GPL(iscsi_destroy_flashnode_sess); static int iscsi_iter_destroy_flashnode_fn(struct device *dev, void *data) { if (!iscsi_flashnode_bus_match(dev, NULL)) return 0; iscsi_destroy_flashnode_sess(iscsi_dev_to_flash_session(dev)); return 0; } /** * iscsi_destroy_all_flashnode - destroy all flashnode session entries * @shost: pointer to host data * * Destroys all the flashnode session entries and all corresponding children * flashnode connection entries from sysfs */ void iscsi_destroy_all_flashnode(struct Scsi_Host *shost) { device_for_each_child(&shost->shost_gendev, NULL, iscsi_iter_destroy_flashnode_fn); } EXPORT_SYMBOL_GPL(iscsi_destroy_all_flashnode); /* * BSG support */ /** * iscsi_bsg_host_dispatch - Dispatch command to LLD. * @job: bsg job to be processed */ static int iscsi_bsg_host_dispatch(struct bsg_job *job) { struct Scsi_Host *shost = iscsi_job_to_shost(job); struct iscsi_bsg_request *req = job->request; struct iscsi_bsg_reply *reply = job->reply; struct iscsi_internal *i = to_iscsi_internal(shost->transportt); int cmdlen = sizeof(uint32_t); /* start with length of msgcode */ int ret; /* check if we have the msgcode value at least */ if (job->request_len < sizeof(uint32_t)) { ret = -ENOMSG; goto fail_host_msg; } /* Validate the host command */ switch (req->msgcode) { case ISCSI_BSG_HST_VENDOR: cmdlen += sizeof(struct iscsi_bsg_host_vendor); if ((shost->hostt->vendor_id == 0L) || (req->rqst_data.h_vendor.vendor_id != shost->hostt->vendor_id)) { ret = -ESRCH; goto fail_host_msg; } break; default: ret = -EBADR; goto fail_host_msg; } /* check if we really have all the request data needed */ if (job->request_len < cmdlen) { ret = -ENOMSG; goto fail_host_msg; } ret = i->iscsi_transport->bsg_request(job); if (!ret) return 0; fail_host_msg: /* return the errno failure code as the only status */ BUG_ON(job->reply_len < sizeof(uint32_t)); reply->reply_payload_rcv_len = 0; reply->result = ret; job->reply_len = sizeof(uint32_t); bsg_job_done(job, ret, 0); return 0; } /** * iscsi_bsg_host_add - Create and add the bsg hooks to receive requests * @shost: shost for iscsi_host * @ihost: iscsi_cls_host adding the structures to */ static int iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost) { struct device *dev = &shost->shost_gendev; struct iscsi_internal *i = to_iscsi_internal(shost->transportt); struct request_queue *q; char bsg_name[20]; if (!i->iscsi_transport->bsg_request) return -ENOTSUPP; snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no); q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, NULL, 0); if (IS_ERR(q)) { shost_printk(KERN_ERR, shost, "bsg interface failed to " "initialize - no request queue\n"); return PTR_ERR(q); } __scsi_init_queue(shost, q); ihost->bsg_q = q; return 0; } static int iscsi_setup_host(struct transport_container *tc, struct device *dev, struct device *cdev) { struct Scsi_Host *shost = dev_to_shost(dev); struct iscsi_cls_host *ihost = shost->shost_data; memset(ihost, 0, sizeof(*ihost)); mutex_init(&ihost->mutex); iscsi_bsg_host_add(shost, ihost); /* ignore any bsg add error - we just can't do sgio */ return 0; } static int iscsi_remove_host(struct transport_container *tc, struct device *dev, struct device *cdev) { struct Scsi_Host *shost = dev_to_shost(dev); struct iscsi_cls_host *ihost = shost->shost_data; bsg_remove_queue(ihost->bsg_q); return 0; } static DECLARE_TRANSPORT_CLASS(iscsi_host_class, "iscsi_host", iscsi_setup_host, iscsi_remove_host, NULL); static DECLARE_TRANSPORT_CLASS(iscsi_session_class, "iscsi_session", NULL, NULL, NULL); static DECLARE_TRANSPORT_CLASS(iscsi_connection_class, "iscsi_connection", NULL, NULL, NULL); static struct sock *nls; static DEFINE_MUTEX(rx_queue_mutex); static LIST_HEAD(sesslist); static DEFINE_SPINLOCK(sesslock); static LIST_HEAD(connlist); static LIST_HEAD(connlist_err); static DEFINE_SPINLOCK(connlock); static uint32_t iscsi_conn_get_sid(struct iscsi_cls_conn *conn) { struct iscsi_cls_session *sess = iscsi_dev_to_session(conn->dev.parent); return sess->sid; } /* * Returns the matching session to a given sid */ static struct iscsi_cls_session *iscsi_session_lookup(uint32_t sid) { unsigned long flags; struct iscsi_cls_session *sess; spin_lock_irqsave(&sesslock, flags); list_for_each_entry(sess, &sesslist, sess_list) { if (sess->sid == sid) { spin_unlock_irqrestore(&sesslock, flags); return sess; } } spin_unlock_irqrestore(&sesslock, flags); return NULL; } /* * Returns the matching connection to a given sid / cid tuple */ static struct iscsi_cls_conn *iscsi_conn_lookup(uint32_t sid, uint32_t cid) { unsigned long flags; struct iscsi_cls_conn *conn; spin_lock_irqsave(&connlock, flags); list_for_each_entry(conn, &connlist, conn_list) { if ((conn->cid == cid) && (iscsi_conn_get_sid(conn) == sid)) { spin_unlock_irqrestore(&connlock, flags); return conn; } } spin_unlock_irqrestore(&connlock, flags); return NULL; } /* * The following functions can be used by LLDs that allocate * their own scsi_hosts or by software iscsi LLDs */ static struct { int value; char *name; } iscsi_session_state_names[] = { { ISCSI_SESSION_LOGGED_IN, "LOGGED_IN" }, { ISCSI_SESSION_FAILED, "FAILED" }, { ISCSI_SESSION_FREE, "FREE" }, }; static const char *iscsi_session_state_name(int state) { int i; char *name = NULL; for (i = 0; i < ARRAY_SIZE(iscsi_session_state_names); i++) { if (iscsi_session_state_names[i].value == state) { name = iscsi_session_state_names[i].name; break; } } return name; } static char *iscsi_session_target_state_name[] = { [ISCSI_SESSION_TARGET_UNBOUND] = "UNBOUND", [ISCSI_SESSION_TARGET_ALLOCATED] = "ALLOCATED", [ISCSI_SESSION_TARGET_SCANNED] = "SCANNED", [ISCSI_SESSION_TARGET_UNBINDING] = "UNBINDING", }; int iscsi_session_chkready(struct iscsi_cls_session *session) { int err; switch (session->state) { case ISCSI_SESSION_LOGGED_IN: err = 0; break; case ISCSI_SESSION_FAILED: err = DID_IMM_RETRY << 16; break; case ISCSI_SESSION_FREE: err = DID_TRANSPORT_FAILFAST << 16; break; default: err = DID_NO_CONNECT << 16; break; } return err; } EXPORT_SYMBOL_GPL(iscsi_session_chkready); int iscsi_is_session_online(struct iscsi_cls_session *session) { unsigned long flags; int ret = 0; spin_lock_irqsave(&session->lock, flags); if (session->state == ISCSI_SESSION_LOGGED_IN) ret = 1; spin_unlock_irqrestore(&session->lock, flags); return ret; } EXPORT_SYMBOL_GPL(iscsi_is_session_online); static void iscsi_session_release(struct device *dev) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev); struct Scsi_Host *shost; shost = iscsi_session_to_shost(session); scsi_host_put(shost); ISCSI_DBG_TRANS_SESSION(session, "Completing session release\n"); kfree(session); } int iscsi_is_session_dev(const struct device *dev) { return dev->release == iscsi_session_release; } EXPORT_SYMBOL_GPL(iscsi_is_session_dev); static int iscsi_iter_session_fn(struct device *dev, void *data) { void (* fn) (struct iscsi_cls_session *) = data; if (!iscsi_is_session_dev(dev)) return 0; fn(iscsi_dev_to_session(dev)); return 0; } void iscsi_host_for_each_session(struct Scsi_Host *shost, void (*fn)(struct iscsi_cls_session *)) { device_for_each_child(&shost->shost_gendev, fn, iscsi_iter_session_fn); } EXPORT_SYMBOL_GPL(iscsi_host_for_each_session); struct iscsi_scan_data { unsigned int channel; unsigned int id; u64 lun; enum scsi_scan_mode rescan; }; static int iscsi_user_scan_session(struct device *dev, void *data) { struct iscsi_scan_data *scan_data = data; struct iscsi_cls_session *session; struct Scsi_Host *shost; struct iscsi_cls_host *ihost; unsigned long flags; unsigned int id; if (!iscsi_is_session_dev(dev)) return 0; session = iscsi_dev_to_session(dev); ISCSI_DBG_TRANS_SESSION(session, "Scanning session\n"); shost = iscsi_session_to_shost(session); ihost = shost->shost_data; mutex_lock(&ihost->mutex); spin_lock_irqsave(&session->lock, flags); if (session->state != ISCSI_SESSION_LOGGED_IN) { spin_unlock_irqrestore(&session->lock, flags); goto user_scan_exit; } id = session->target_id; spin_unlock_irqrestore(&session->lock, flags); if (id != ISCSI_MAX_TARGET) { if ((scan_data->channel == SCAN_WILD_CARD || scan_data->channel == 0) && (scan_data->id == SCAN_WILD_CARD || scan_data->id == id)) { scsi_scan_target(&session->dev, 0, id, scan_data->lun, scan_data->rescan); spin_lock_irqsave(&session->lock, flags); session->target_state = ISCSI_SESSION_TARGET_SCANNED; spin_unlock_irqrestore(&session->lock, flags); } } user_scan_exit: mutex_unlock(&ihost->mutex); ISCSI_DBG_TRANS_SESSION(session, "Completed session scan\n"); return 0; } static int iscsi_user_scan(struct Scsi_Host *shost, uint channel, uint id, u64 lun) { struct iscsi_scan_data scan_data; scan_data.channel = channel; scan_data.id = id; scan_data.lun = lun; scan_data.rescan = SCSI_SCAN_MANUAL; return device_for_each_child(&shost->shost_gendev, &scan_data, iscsi_user_scan_session); } static void iscsi_scan_session(struct work_struct *work) { struct iscsi_cls_session *session = container_of(work, struct iscsi_cls_session, scan_work); struct iscsi_scan_data scan_data; scan_data.channel = 0; scan_data.id = SCAN_WILD_CARD; scan_data.lun = SCAN_WILD_CARD; scan_data.rescan = SCSI_SCAN_RESCAN; iscsi_user_scan_session(&session->dev, &scan_data); } /** * iscsi_block_scsi_eh - block scsi eh until session state has transistioned * @cmd: scsi cmd passed to scsi eh handler * * If the session is down this function will wait for the recovery * timer to fire or for the session to be logged back in. If the * recovery timer fires then FAST_IO_FAIL is returned. The caller * should pass this error value to the scsi eh. */ int iscsi_block_scsi_eh(struct scsi_cmnd *cmd) { struct iscsi_cls_session *session = starget_to_session(scsi_target(cmd->device)); unsigned long flags; int ret = 0; spin_lock_irqsave(&session->lock, flags); while (session->state != ISCSI_SESSION_LOGGED_IN) { if (session->state == ISCSI_SESSION_FREE) { ret = FAST_IO_FAIL; break; } spin_unlock_irqrestore(&session->lock, flags); msleep(1000); spin_lock_irqsave(&session->lock, flags); } spin_unlock_irqrestore(&session->lock, flags); return ret; } EXPORT_SYMBOL_GPL(iscsi_block_scsi_eh); static void session_recovery_timedout(struct work_struct *work) { struct iscsi_cls_session *session = container_of(work, struct iscsi_cls_session, recovery_work.work); unsigned long flags; iscsi_cls_session_printk(KERN_INFO, session, "session recovery timed out after %d secs\n", session->recovery_tmo); spin_lock_irqsave(&session->lock, flags); switch (session->state) { case ISCSI_SESSION_FAILED: session->state = ISCSI_SESSION_FREE; break; case ISCSI_SESSION_LOGGED_IN: case ISCSI_SESSION_FREE: /* we raced with the unblock's flush */ spin_unlock_irqrestore(&session->lock, flags); return; } spin_unlock_irqrestore(&session->lock, flags); ISCSI_DBG_TRANS_SESSION(session, "Unblocking SCSI target\n"); scsi_target_unblock(&session->dev, SDEV_TRANSPORT_OFFLINE); ISCSI_DBG_TRANS_SESSION(session, "Completed unblocking SCSI target\n"); if (session->transport->session_recovery_timedout) session->transport->session_recovery_timedout(session); } static void __iscsi_unblock_session(struct work_struct *work) { struct iscsi_cls_session *session = container_of(work, struct iscsi_cls_session, unblock_work); unsigned long flags; ISCSI_DBG_TRANS_SESSION(session, "Unblocking session\n"); cancel_delayed_work_sync(&session->recovery_work); spin_lock_irqsave(&session->lock, flags); session->state = ISCSI_SESSION_LOGGED_IN; spin_unlock_irqrestore(&session->lock, flags); /* start IO */ scsi_target_unblock(&session->dev, SDEV_RUNNING); ISCSI_DBG_TRANS_SESSION(session, "Completed unblocking session\n"); } /** * iscsi_unblock_session - set a session as logged in and start IO. * @session: iscsi session * * Mark a session as ready to accept IO. */ void iscsi_unblock_session(struct iscsi_cls_session *session) { if (!cancel_work_sync(&session->block_work)) cancel_delayed_work_sync(&session->recovery_work); queue_work(session->workq, &session->unblock_work); /* * Blocking the session can be done from any context so we only * queue the block work. Make sure the unblock work has completed * because it flushes/cancels the other works and updates the state. */ flush_work(&session->unblock_work); } EXPORT_SYMBOL_GPL(iscsi_unblock_session); static void __iscsi_block_session(struct work_struct *work) { struct iscsi_cls_session *session = container_of(work, struct iscsi_cls_session, block_work); struct Scsi_Host *shost = iscsi_session_to_shost(session); unsigned long flags; ISCSI_DBG_TRANS_SESSION(session, "Blocking session\n"); spin_lock_irqsave(&session->lock, flags); session->state = ISCSI_SESSION_FAILED; spin_unlock_irqrestore(&session->lock, flags); scsi_block_targets(shost, &session->dev); ISCSI_DBG_TRANS_SESSION(session, "Completed SCSI target blocking\n"); if (session->recovery_tmo >= 0) queue_delayed_work(session->workq, &session->recovery_work, session->recovery_tmo * HZ); } void iscsi_block_session(struct iscsi_cls_session *session) { queue_work(session->workq, &session->block_work); } EXPORT_SYMBOL_GPL(iscsi_block_session); static void __iscsi_unbind_session(struct work_struct *work) { struct iscsi_cls_session *session = container_of(work, struct iscsi_cls_session, unbind_work); struct Scsi_Host *shost = iscsi_session_to_shost(session); struct iscsi_cls_host *ihost = shost->shost_data; unsigned long flags; unsigned int target_id; bool remove_target = true; ISCSI_DBG_TRANS_SESSION(session, "Unbinding session\n"); /* Prevent new scans and make sure scanning is not in progress */ mutex_lock(&ihost->mutex); spin_lock_irqsave(&session->lock, flags); if (session->target_state == ISCSI_SESSION_TARGET_ALLOCATED) { remove_target = false; } else if (session->target_state != ISCSI_SESSION_TARGET_SCANNED) { spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); ISCSI_DBG_TRANS_SESSION(session, "Skipping target unbinding: Session is unbound/unbinding.\n"); return; } session->target_state = ISCSI_SESSION_TARGET_UNBINDING; target_id = session->target_id; session->target_id = ISCSI_MAX_TARGET; spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); if (remove_target) scsi_remove_target(&session->dev); if (session->ida_used) ida_free(&iscsi_sess_ida, target_id); iscsi_session_event(session, ISCSI_KEVENT_UNBIND_SESSION); ISCSI_DBG_TRANS_SESSION(session, "Completed target removal\n"); spin_lock_irqsave(&session->lock, flags); session->target_state = ISCSI_SESSION_TARGET_UNBOUND; spin_unlock_irqrestore(&session->lock, flags); } static void __iscsi_destroy_session(struct work_struct *work) { struct iscsi_cls_session *session = container_of(work, struct iscsi_cls_session, destroy_work); session->transport->destroy_session(session); } struct iscsi_cls_session * iscsi_alloc_session(struct Scsi_Host *shost, struct iscsi_transport *transport, int dd_size) { struct iscsi_cls_session *session; session = kzalloc(sizeof(*session) + dd_size, GFP_KERNEL); if (!session) return NULL; session->transport = transport; session->creator = -1; session->recovery_tmo = 120; session->recovery_tmo_sysfs_override = false; session->state = ISCSI_SESSION_FREE; INIT_DELAYED_WORK(&session->recovery_work, session_recovery_timedout); INIT_LIST_HEAD(&session->sess_list); INIT_WORK(&session->unblock_work, __iscsi_unblock_session); INIT_WORK(&session->block_work, __iscsi_block_session); INIT_WORK(&session->unbind_work, __iscsi_unbind_session); INIT_WORK(&session->scan_work, iscsi_scan_session); INIT_WORK(&session->destroy_work, __iscsi_destroy_session); spin_lock_init(&session->lock); /* this is released in the dev's release function */ scsi_host_get(shost); session->dev.parent = &shost->shost_gendev; session->dev.release = iscsi_session_release; device_initialize(&session->dev); if (dd_size) session->dd_data = &session[1]; ISCSI_DBG_TRANS_SESSION(session, "Completed session allocation\n"); return session; } EXPORT_SYMBOL_GPL(iscsi_alloc_session); int iscsi_add_session(struct iscsi_cls_session *session, unsigned int target_id) { struct Scsi_Host *shost = iscsi_session_to_shost(session); unsigned long flags; int id = 0; int err; session->sid = atomic_add_return(1, &iscsi_session_nr); session->workq = alloc_workqueue("iscsi_ctrl_%d:%d", WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND, 0, shost->host_no, session->sid); if (!session->workq) return -ENOMEM; if (target_id == ISCSI_MAX_TARGET) { id = ida_alloc(&iscsi_sess_ida, GFP_KERNEL); if (id < 0) { iscsi_cls_session_printk(KERN_ERR, session, "Failure in Target ID Allocation\n"); err = id; goto destroy_wq; } session->target_id = (unsigned int)id; session->ida_used = true; } else session->target_id = target_id; spin_lock_irqsave(&session->lock, flags); session->target_state = ISCSI_SESSION_TARGET_ALLOCATED; spin_unlock_irqrestore(&session->lock, flags); dev_set_name(&session->dev, "session%u", session->sid); err = device_add(&session->dev); if (err) { iscsi_cls_session_printk(KERN_ERR, session, "could not register session's dev\n"); goto release_ida; } err = transport_register_device(&session->dev); if (err) { iscsi_cls_session_printk(KERN_ERR, session, "could not register transport's dev\n"); goto release_dev; } spin_lock_irqsave(&sesslock, flags); list_add(&session->sess_list, &sesslist); spin_unlock_irqrestore(&sesslock, flags); iscsi_session_event(session, ISCSI_KEVENT_CREATE_SESSION); ISCSI_DBG_TRANS_SESSION(session, "Completed session adding\n"); return 0; release_dev: device_del(&session->dev); release_ida: if (session->ida_used) ida_free(&iscsi_sess_ida, session->target_id); destroy_wq: destroy_workqueue(session->workq); return err; } EXPORT_SYMBOL_GPL(iscsi_add_session); /** * iscsi_create_session - create iscsi class session * @shost: scsi host * @transport: iscsi transport * @dd_size: private driver data size * @target_id: which target * * This can be called from a LLD or iscsi_transport. */ struct iscsi_cls_session * iscsi_create_session(struct Scsi_Host *shost, struct iscsi_transport *transport, int dd_size, unsigned int target_id) { struct iscsi_cls_session *session; session = iscsi_alloc_session(shost, transport, dd_size); if (!session) return NULL; if (iscsi_add_session(session, target_id)) { iscsi_free_session(session); return NULL; } return session; } EXPORT_SYMBOL_GPL(iscsi_create_session); static void iscsi_conn_release(struct device *dev) { struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev); struct device *parent = conn->dev.parent; ISCSI_DBG_TRANS_CONN(conn, "Releasing conn\n"); kfree(conn); put_device(parent); } static int iscsi_is_conn_dev(const struct device *dev) { return dev->release == iscsi_conn_release; } static int iscsi_iter_destroy_conn_fn(struct device *dev, void *data) { if (!iscsi_is_conn_dev(dev)) return 0; iscsi_remove_conn(iscsi_dev_to_conn(dev)); return 0; } void iscsi_remove_session(struct iscsi_cls_session *session) { unsigned long flags; int err; ISCSI_DBG_TRANS_SESSION(session, "Removing session\n"); spin_lock_irqsave(&sesslock, flags); if (!list_empty(&session->sess_list)) list_del(&session->sess_list); spin_unlock_irqrestore(&sesslock, flags); if (!cancel_work_sync(&session->block_work)) cancel_delayed_work_sync(&session->recovery_work); cancel_work_sync(&session->unblock_work); /* * If we are blocked let commands flow again. The lld or iscsi * layer should set up the queuecommand to fail commands. * We assume that LLD will not be calling block/unblock while * removing the session. */ spin_lock_irqsave(&session->lock, flags); session->state = ISCSI_SESSION_FREE; spin_unlock_irqrestore(&session->lock, flags); scsi_target_unblock(&session->dev, SDEV_TRANSPORT_OFFLINE); /* * qla4xxx can perform it's own scans when it runs in kernel only * mode. Make sure to flush those scans. */ flush_work(&session->scan_work); /* flush running unbind operations */ flush_work(&session->unbind_work); __iscsi_unbind_session(&session->unbind_work); /* hw iscsi may not have removed all connections from session */ err = device_for_each_child(&session->dev, NULL, iscsi_iter_destroy_conn_fn); if (err) iscsi_cls_session_printk(KERN_ERR, session, "Could not delete all connections " "for session. Error %d.\n", err); transport_unregister_device(&session->dev); destroy_workqueue(session->workq); ISCSI_DBG_TRANS_SESSION(session, "Completing session removal\n"); device_del(&session->dev); } EXPORT_SYMBOL_GPL(iscsi_remove_session); static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) { ISCSI_DBG_TRANS_CONN(conn, "Stopping conn.\n"); switch (flag) { case STOP_CONN_RECOVER: WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); break; case STOP_CONN_TERM: WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); break; default: iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n", flag); return; } conn->transport->stop_conn(conn, flag); ISCSI_DBG_TRANS_CONN(conn, "Stopping conn done.\n"); } static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) { struct iscsi_cls_session *session = iscsi_conn_to_session(conn); struct iscsi_endpoint *ep; ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); if (!conn->ep || !session->transport->ep_disconnect) return; ep = conn->ep; conn->ep = NULL; session->transport->unbind_conn(conn, is_active); session->transport->ep_disconnect(ep); ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); } static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn, struct iscsi_endpoint *ep, bool is_active) { /* Check if this was a conn error and the kernel took ownership */ spin_lock_irq(&conn->lock); if (!test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { spin_unlock_irq(&conn->lock); iscsi_ep_disconnect(conn, is_active); } else { spin_unlock_irq(&conn->lock); ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); mutex_unlock(&conn->ep_mutex); flush_work(&conn->cleanup_work); /* * Userspace is now done with the EP so we can release the ref * iscsi_cleanup_conn_work_fn took. */ iscsi_put_endpoint(ep); mutex_lock(&conn->ep_mutex); } } static int iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag) { ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop.\n"); /* * For offload, iscsid may not know about the ep like when iscsid is * restarted or for kernel based session shutdown iscsid is not even * up. For these cases, we do the disconnect now. */ mutex_lock(&conn->ep_mutex); if (conn->ep) iscsi_if_disconnect_bound_ep(conn, conn->ep, true); mutex_unlock(&conn->ep_mutex); /* * If this is a termination we have to call stop_conn with that flag * so the correct states get set. If we haven't run the work yet try to * avoid the extra run. */ if (flag == STOP_CONN_TERM) { cancel_work_sync(&conn->cleanup_work); iscsi_stop_conn(conn, flag); } else { /* * Figure out if it was the kernel or userspace initiating this. */ spin_lock_irq(&conn->lock); if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { spin_unlock_irq(&conn->lock); iscsi_stop_conn(conn, flag); } else { spin_unlock_irq(&conn->lock); ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); flush_work(&conn->cleanup_work); } /* * Only clear for recovery to avoid extra cleanup runs during * termination. */ spin_lock_irq(&conn->lock); clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); spin_unlock_irq(&conn->lock); } ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop done.\n"); return 0; } static void iscsi_cleanup_conn_work_fn(struct work_struct *work) { struct iscsi_cls_conn *conn = container_of(work, struct iscsi_cls_conn, cleanup_work); struct iscsi_cls_session *session = iscsi_conn_to_session(conn); mutex_lock(&conn->ep_mutex); /* * Get a ref to the ep, so we don't release its ID until after * userspace is done referencing it in iscsi_if_disconnect_bound_ep. */ if (conn->ep) get_device(&conn->ep->dev); iscsi_ep_disconnect(conn, false); if (system_state != SYSTEM_RUNNING) { /* * If the user has set up for the session to never timeout * then hang like they wanted. For all other cases fail right * away since userspace is not going to relogin. */ if (session->recovery_tmo > 0) session->recovery_tmo = 0; } iscsi_stop_conn(conn, STOP_CONN_RECOVER); mutex_unlock(&conn->ep_mutex); ISCSI_DBG_TRANS_CONN(conn, "cleanup done.\n"); } static int iscsi_iter_force_destroy_conn_fn(struct device *dev, void *data) { struct iscsi_transport *transport; struct iscsi_cls_conn *conn; if (!iscsi_is_conn_dev(dev)) return 0; conn = iscsi_dev_to_conn(dev); transport = conn->transport; if (READ_ONCE(conn->state) != ISCSI_CONN_DOWN) iscsi_if_stop_conn(conn, STOP_CONN_TERM); transport->destroy_conn(conn); return 0; } /** * iscsi_force_destroy_session - destroy a session from the kernel * @session: session to destroy * * Force the destruction of a session from the kernel. This should only be * used when userspace is no longer running during system shutdown. */ void iscsi_force_destroy_session(struct iscsi_cls_session *session) { struct iscsi_transport *transport = session->transport; unsigned long flags; WARN_ON_ONCE(system_state == SYSTEM_RUNNING); spin_lock_irqsave(&sesslock, flags); if (list_empty(&session->sess_list)) { spin_unlock_irqrestore(&sesslock, flags); /* * Conn/ep is already freed. Session is being torn down via * async path. For shutdown we don't care about it so return. */ return; } spin_unlock_irqrestore(&sesslock, flags); device_for_each_child(&session->dev, NULL, iscsi_iter_force_destroy_conn_fn); transport->destroy_session(session); } EXPORT_SYMBOL_GPL(iscsi_force_destroy_session); void iscsi_free_session(struct iscsi_cls_session *session) { ISCSI_DBG_TRANS_SESSION(session, "Freeing session\n"); iscsi_session_event(session, ISCSI_KEVENT_DESTROY_SESSION); put_device(&session->dev); } EXPORT_SYMBOL_GPL(iscsi_free_session); /** * iscsi_alloc_conn - alloc iscsi class connection * @session: iscsi cls session * @dd_size: private driver data size * @cid: connection id */ struct iscsi_cls_conn * iscsi_alloc_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) { struct iscsi_transport *transport = session->transport; struct iscsi_cls_conn *conn; conn = kzalloc(sizeof(*conn) + dd_size, GFP_KERNEL); if (!conn) return NULL; if (dd_size) conn->dd_data = &conn[1]; mutex_init(&conn->ep_mutex); spin_lock_init(&conn->lock); INIT_LIST_HEAD(&conn->conn_list); INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn); conn->transport = transport; conn->cid = cid; WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); /* this is released in the dev's release function */ if (!get_device(&session->dev)) goto free_conn; dev_set_name(&conn->dev, "connection%d:%u", session->sid, cid); device_initialize(&conn->dev); conn->dev.parent = &session->dev; conn->dev.release = iscsi_conn_release; return conn; free_conn: kfree(conn); return NULL; } EXPORT_SYMBOL_GPL(iscsi_alloc_conn); /** * iscsi_add_conn - add iscsi class connection * @conn: iscsi cls connection * * This will expose iscsi_cls_conn to sysfs so make sure the related * resources for sysfs attributes are initialized before calling this. */ int iscsi_add_conn(struct iscsi_cls_conn *conn) { int err; unsigned long flags; struct iscsi_cls_session *session = iscsi_dev_to_session(conn->dev.parent); err = device_add(&conn->dev); if (err) { iscsi_cls_session_printk(KERN_ERR, session, "could not register connection's dev\n"); return err; } err = transport_register_device(&conn->dev); if (err) { iscsi_cls_session_printk(KERN_ERR, session, "could not register transport's dev\n"); device_del(&conn->dev); return err; } spin_lock_irqsave(&connlock, flags); list_add(&conn->conn_list, &connlist); spin_unlock_irqrestore(&connlock, flags); return 0; } EXPORT_SYMBOL_GPL(iscsi_add_conn); /** * iscsi_remove_conn - remove iscsi class connection from sysfs * @conn: iscsi cls connection * * Remove iscsi_cls_conn from sysfs, and wait for previous * read/write of iscsi_cls_conn's attributes in sysfs to finish. */ void iscsi_remove_conn(struct iscsi_cls_conn *conn) { unsigned long flags; spin_lock_irqsave(&connlock, flags); list_del(&conn->conn_list); spin_unlock_irqrestore(&connlock, flags); transport_unregister_device(&conn->dev); device_del(&conn->dev); } EXPORT_SYMBOL_GPL(iscsi_remove_conn); void iscsi_put_conn(struct iscsi_cls_conn *conn) { put_device(&conn->dev); } EXPORT_SYMBOL_GPL(iscsi_put_conn); void iscsi_get_conn(struct iscsi_cls_conn *conn) { get_device(&conn->dev); } EXPORT_SYMBOL_GPL(iscsi_get_conn); /* * iscsi interface functions */ static struct iscsi_internal * iscsi_if_transport_lookup(struct iscsi_transport *tt) { struct iscsi_internal *priv; unsigned long flags; spin_lock_irqsave(&iscsi_transport_lock, flags); list_for_each_entry(priv, &iscsi_transports, list) { if (tt == priv->iscsi_transport) { spin_unlock_irqrestore(&iscsi_transport_lock, flags); return priv; } } spin_unlock_irqrestore(&iscsi_transport_lock, flags); return NULL; } static int iscsi_multicast_skb(struct sk_buff *skb, uint32_t group, gfp_t gfp) { return nlmsg_multicast(nls, skb, 0, group, gfp); } static int iscsi_unicast_skb(struct sk_buff *skb, u32 portid) { return nlmsg_unicast(nls, skb, portid); } int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size) { struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; char *pdu; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev) + sizeof(struct iscsi_hdr) + data_size); priv = iscsi_if_transport_lookup(conn->transport); if (!priv) return -EINVAL; skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { iscsi_conn_error_event(conn, ISCSI_ERR_CONN_FAILED); iscsi_cls_conn_printk(KERN_ERR, conn, "can not deliver " "control PDU: OOM\n"); return -ENOMEM; } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = nlmsg_data(nlh); memset(ev, 0, sizeof(*ev)); ev->transport_handle = iscsi_handle(conn->transport); ev->type = ISCSI_KEVENT_RECV_PDU; ev->r.recv_req.cid = conn->cid; ev->r.recv_req.sid = iscsi_conn_get_sid(conn); pdu = (char*)ev + sizeof(*ev); memcpy(pdu, hdr, sizeof(struct iscsi_hdr)); memcpy(pdu + sizeof(struct iscsi_hdr), data, data_size); return iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC); } EXPORT_SYMBOL_GPL(iscsi_recv_pdu); int iscsi_offload_mesg(struct Scsi_Host *shost, struct iscsi_transport *transport, uint32_t type, char *data, uint16_t data_size) { struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; int len = nlmsg_total_size(sizeof(*ev) + data_size); skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { printk(KERN_ERR "can not deliver iscsi offload message:OOM\n"); return -ENOMEM; } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = nlmsg_data(nlh); memset(ev, 0, sizeof(*ev)); ev->type = type; ev->transport_handle = iscsi_handle(transport); switch (type) { case ISCSI_KEVENT_PATH_REQ: ev->r.req_path.host_no = shost->host_no; break; case ISCSI_KEVENT_IF_DOWN: ev->r.notify_if_down.host_no = shost->host_no; break; } memcpy((char *)ev + sizeof(*ev), data, data_size); return iscsi_multicast_skb(skb, ISCSI_NL_GRP_UIP, GFP_ATOMIC); } EXPORT_SYMBOL_GPL(iscsi_offload_mesg); void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) { struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); unsigned long flags; int state; spin_lock_irqsave(&conn->lock, flags); /* * Userspace will only do a stop call if we are at least bound. And, we * only need to do the in kernel cleanup if in the UP state so cmds can * be released to upper layers. If in other states just wait for * userspace to avoid races that can leave the cleanup_work queued. */ state = READ_ONCE(conn->state); switch (state) { case ISCSI_CONN_BOUND: case ISCSI_CONN_UP: if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work); } break; default: ISCSI_DBG_TRANS_CONN(conn, "Got conn error in state %d\n", state); break; } spin_unlock_irqrestore(&conn->lock, flags); priv = iscsi_if_transport_lookup(conn->transport); if (!priv) return; skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { iscsi_cls_conn_printk(KERN_ERR, conn, "gracefully ignored " "conn error (%d)\n", error); return; } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(conn->transport); ev->type = ISCSI_KEVENT_CONN_ERROR; ev->r.connerror.error = error; ev->r.connerror.cid = conn->cid; ev->r.connerror.sid = iscsi_conn_get_sid(conn); iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC); iscsi_cls_conn_printk(KERN_INFO, conn, "detected conn error (%d)\n", error); } EXPORT_SYMBOL_GPL(iscsi_conn_error_event); void iscsi_conn_login_event(struct iscsi_cls_conn *conn, enum iscsi_conn_state state) { struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); priv = iscsi_if_transport_lookup(conn->transport); if (!priv) return; skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { iscsi_cls_conn_printk(KERN_ERR, conn, "gracefully ignored " "conn login (%d)\n", state); return; } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(conn->transport); ev->type = ISCSI_KEVENT_CONN_LOGIN_STATE; ev->r.conn_login.state = state; ev->r.conn_login.cid = conn->cid; ev->r.conn_login.sid = iscsi_conn_get_sid(conn); iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC); iscsi_cls_conn_printk(KERN_INFO, conn, "detected conn login (%d)\n", state); } EXPORT_SYMBOL_GPL(iscsi_conn_login_event); void iscsi_post_host_event(uint32_t host_no, struct iscsi_transport *transport, enum iscsi_host_event_code code, uint32_t data_size, uint8_t *data) { struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; int len = nlmsg_total_size(sizeof(*ev) + data_size); skb = alloc_skb(len, GFP_NOIO); if (!skb) { printk(KERN_ERR "gracefully ignored host event (%d):%d OOM\n", host_no, code); return; } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(transport); ev->type = ISCSI_KEVENT_HOST_EVENT; ev->r.host_event.host_no = host_no; ev->r.host_event.code = code; ev->r.host_event.data_size = data_size; if (data_size) memcpy((char *)ev + sizeof(*ev), data, data_size); iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_NOIO); } EXPORT_SYMBOL_GPL(iscsi_post_host_event); void iscsi_ping_comp_event(uint32_t host_no, struct iscsi_transport *transport, uint32_t status, uint32_t pid, uint32_t data_size, uint8_t *data) { struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; int len = nlmsg_total_size(sizeof(*ev) + data_size); skb = alloc_skb(len, GFP_NOIO); if (!skb) { printk(KERN_ERR "gracefully ignored ping comp: OOM\n"); return; } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(transport); ev->type = ISCSI_KEVENT_PING_COMP; ev->r.ping_comp.host_no = host_no; ev->r.ping_comp.status = status; ev->r.ping_comp.pid = pid; ev->r.ping_comp.data_size = data_size; memcpy((char *)ev + sizeof(*ev), data, data_size); iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_NOIO); } EXPORT_SYMBOL_GPL(iscsi_ping_comp_event); static int iscsi_if_send_reply(u32 portid, int type, void *payload, int size) { struct sk_buff *skb; struct nlmsghdr *nlh; int len = nlmsg_total_size(size); skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { printk(KERN_ERR "Could not allocate skb to send reply.\n"); return -ENOMEM; } nlh = __nlmsg_put(skb, 0, 0, type, (len - sizeof(*nlh)), 0); memcpy(nlmsg_data(nlh), payload, size); return iscsi_unicast_skb(skb, portid); } static int iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh) { struct iscsi_uevent *ev = nlmsg_data(nlh); struct iscsi_stats *stats; struct sk_buff *skbstat; struct iscsi_cls_conn *conn; struct nlmsghdr *nlhstat; struct iscsi_uevent *evstat; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev) + sizeof(struct iscsi_stats) + sizeof(struct iscsi_stats_custom) * ISCSI_STATS_CUSTOM_MAX); int err = 0; priv = iscsi_if_transport_lookup(transport); if (!priv) return -EINVAL; conn = iscsi_conn_lookup(ev->u.get_stats.sid, ev->u.get_stats.cid); if (!conn) return -EEXIST; do { int actual_size; skbstat = alloc_skb(len, GFP_ATOMIC); if (!skbstat) { iscsi_cls_conn_printk(KERN_ERR, conn, "can not " "deliver stats: OOM\n"); return -ENOMEM; } nlhstat = __nlmsg_put(skbstat, 0, 0, 0, (len - sizeof(*nlhstat)), 0); evstat = nlmsg_data(nlhstat); memset(evstat, 0, sizeof(*evstat)); evstat->transport_handle = iscsi_handle(conn->transport); evstat->type = nlh->nlmsg_type; evstat->u.get_stats.cid = ev->u.get_stats.cid; evstat->u.get_stats.sid = ev->u.get_stats.sid; stats = (struct iscsi_stats *) ((char*)evstat + sizeof(*evstat)); memset(stats, 0, sizeof(*stats)); transport->get_stats(conn, stats); actual_size = nlmsg_total_size(sizeof(struct iscsi_uevent) + sizeof(struct iscsi_stats) + sizeof(struct iscsi_stats_custom) * stats->custom_length); actual_size -= sizeof(*nlhstat); actual_size = nlmsg_msg_size(actual_size); skb_trim(skbstat, NLMSG_ALIGN(actual_size)); nlhstat->nlmsg_len = actual_size; err = iscsi_multicast_skb(skbstat, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC); } while (err < 0 && err != -ECONNREFUSED); return err; } /** * iscsi_session_event - send session destr. completion event * @session: iscsi class session * @event: type of event */ int iscsi_session_event(struct iscsi_cls_session *session, enum iscsi_uevent_e event) { struct iscsi_internal *priv; struct Scsi_Host *shost; struct iscsi_uevent *ev; struct sk_buff *skb; struct nlmsghdr *nlh; int rc, len = nlmsg_total_size(sizeof(*ev)); priv = iscsi_if_transport_lookup(session->transport); if (!priv) return -EINVAL; shost = iscsi_session_to_shost(session); skb = alloc_skb(len, GFP_KERNEL); if (!skb) { iscsi_cls_session_printk(KERN_ERR, session, "Cannot notify userspace of session " "event %u\n", event); return -ENOMEM; } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(session->transport); ev->type = event; switch (event) { case ISCSI_KEVENT_DESTROY_SESSION: ev->r.d_session.host_no = shost->host_no; ev->r.d_session.sid = session->sid; break; case ISCSI_KEVENT_CREATE_SESSION: ev->r.c_session_ret.host_no = shost->host_no; ev->r.c_session_ret.sid = session->sid; break; case ISCSI_KEVENT_UNBIND_SESSION: ev->r.unbind_session.host_no = shost->host_no; ev->r.unbind_session.sid = session->sid; break; default: iscsi_cls_session_printk(KERN_ERR, session, "Invalid event " "%u.\n", event); kfree_skb(skb); return -EINVAL; } /* * this will occur if the daemon is not up, so we just warn * the user and when the daemon is restarted it will handle it */ rc = iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_KERNEL); if (rc == -ESRCH) iscsi_cls_session_printk(KERN_ERR, session, "Cannot notify userspace of session " "event %u. Check iscsi daemon\n", event); ISCSI_DBG_TRANS_SESSION(session, "Completed handling event %d rc %d\n", event, rc); return rc; } EXPORT_SYMBOL_GPL(iscsi_session_event); static int iscsi_if_create_session(struct iscsi_internal *priv, struct iscsi_endpoint *ep, struct iscsi_uevent *ev, pid_t pid, uint32_t initial_cmdsn, uint16_t cmds_max, uint16_t queue_depth) { struct iscsi_transport *transport = priv->iscsi_transport; struct iscsi_cls_session *session; struct Scsi_Host *shost; session = transport->create_session(ep, cmds_max, queue_depth, initial_cmdsn); if (!session) return -ENOMEM; session->creator = pid; shost = iscsi_session_to_shost(session); ev->r.c_session_ret.host_no = shost->host_no; ev->r.c_session_ret.sid = session->sid; ISCSI_DBG_TRANS_SESSION(session, "Completed creating transport session\n"); return 0; } static int iscsi_if_create_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct iscsi_cls_conn *conn; struct iscsi_cls_session *session; session = iscsi_session_lookup(ev->u.c_conn.sid); if (!session) { printk(KERN_ERR "iscsi: invalid session %d.\n", ev->u.c_conn.sid); return -EINVAL; } conn = transport->create_conn(session, ev->u.c_conn.cid); if (!conn) { iscsi_cls_session_printk(KERN_ERR, session, "couldn't create a new connection."); return -ENOMEM; } ev->r.c_conn_ret.sid = session->sid; ev->r.c_conn_ret.cid = conn->cid; ISCSI_DBG_TRANS_CONN(conn, "Completed creating transport conn\n"); return 0; } static int iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct iscsi_cls_conn *conn; conn = iscsi_conn_lookup(ev->u.d_conn.sid, ev->u.d_conn.cid); if (!conn) return -EINVAL; ISCSI_DBG_TRANS_CONN(conn, "Flushing cleanup during destruction\n"); flush_work(&conn->cleanup_work); ISCSI_DBG_TRANS_CONN(conn, "Destroying transport conn\n"); if (transport->destroy_conn) transport->destroy_conn(conn); return 0; } static int iscsi_if_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev, u32 rlen) { char *data = (char*)ev + sizeof(*ev); struct iscsi_cls_conn *conn; struct iscsi_cls_session *session; int err = 0, value = 0, state; if (ev->u.set_param.len > rlen || ev->u.set_param.len > PAGE_SIZE) return -EINVAL; session = iscsi_session_lookup(ev->u.set_param.sid); conn = iscsi_conn_lookup(ev->u.set_param.sid, ev->u.set_param.cid); if (!conn || !session) return -EINVAL; /* data will be regarded as NULL-ended string, do length check */ if (strlen(data) > ev->u.set_param.len) return -EINVAL; switch (ev->u.set_param.param) { case ISCSI_PARAM_SESS_RECOVERY_TMO: sscanf(data, "%d", &value); if (!session->recovery_tmo_sysfs_override) session->recovery_tmo = value; break; default: state = READ_ONCE(conn->state); if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) { err = transport->set_param(conn, ev->u.set_param.param, data, ev->u.set_param.len); } else { return -ENOTCONN; } } return err; } static int iscsi_if_ep_connect(struct iscsi_transport *transport, struct iscsi_uevent *ev, int msg_type) { struct iscsi_endpoint *ep; struct sockaddr *dst_addr; struct Scsi_Host *shost = NULL; int non_blocking, err = 0; if (!transport->ep_connect) return -EINVAL; if (msg_type == ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST) { shost = scsi_host_lookup(ev->u.ep_connect_through_host.host_no); if (!shost) { printk(KERN_ERR "ep connect failed. Could not find " "host no %u\n", ev->u.ep_connect_through_host.host_no); return -ENODEV; } non_blocking = ev->u.ep_connect_through_host.non_blocking; } else non_blocking = ev->u.ep_connect.non_blocking; dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev)); ep = transport->ep_connect(shost, dst_addr, non_blocking); if (IS_ERR(ep)) { err = PTR_ERR(ep); goto release_host; } ev->r.ep_connect_ret.handle = ep->id; release_host: if (shost) scsi_host_put(shost); return err; } static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, u64 ep_handle) { struct iscsi_cls_conn *conn; struct iscsi_endpoint *ep; if (!transport->ep_disconnect) return -EINVAL; ep = iscsi_lookup_endpoint(ep_handle); if (!ep) return -EINVAL; conn = ep->conn; if (!conn) { /* * conn was not even bound yet, so we can't get iscsi conn * failures yet. */ transport->ep_disconnect(ep); goto put_ep; } mutex_lock(&conn->ep_mutex); iscsi_if_disconnect_bound_ep(conn, ep, false); mutex_unlock(&conn->ep_mutex); put_ep: iscsi_put_endpoint(ep); return 0; } static int iscsi_if_transport_ep(struct iscsi_transport *transport, struct iscsi_uevent *ev, int msg_type, u32 rlen) { struct iscsi_endpoint *ep; int rc = 0; switch (msg_type) { case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST: case ISCSI_UEVENT_TRANSPORT_EP_CONNECT: if (rlen < sizeof(struct sockaddr)) rc = -EINVAL; else rc = iscsi_if_ep_connect(transport, ev, msg_type); break; case ISCSI_UEVENT_TRANSPORT_EP_POLL: if (!transport->ep_poll) return -EINVAL; ep = iscsi_lookup_endpoint(ev->u.ep_poll.ep_handle); if (!ep) return -EINVAL; ev->r.retcode = transport->ep_poll(ep, ev->u.ep_poll.timeout_ms); iscsi_put_endpoint(ep); break; case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT: rc = iscsi_if_ep_disconnect(transport, ev->u.ep_disconnect.ep_handle); break; } return rc; } static int iscsi_tgt_dscvr(struct iscsi_transport *transport, struct iscsi_uevent *ev, u32 rlen) { struct Scsi_Host *shost; struct sockaddr *dst_addr; int err; if (rlen < sizeof(*dst_addr)) return -EINVAL; if (!transport->tgt_dscvr) return -EINVAL; shost = scsi_host_lookup(ev->u.tgt_dscvr.host_no); if (!shost) { printk(KERN_ERR "target discovery could not find host no %u\n", ev->u.tgt_dscvr.host_no); return -ENODEV; } dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev)); err = transport->tgt_dscvr(shost, ev->u.tgt_dscvr.type, ev->u.tgt_dscvr.enable, dst_addr); scsi_host_put(shost); return err; } static int iscsi_set_host_param(struct iscsi_transport *transport, struct iscsi_uevent *ev, u32 rlen) { char *data = (char*)ev + sizeof(*ev); struct Scsi_Host *shost; int err; if (!transport->set_host_param) return -ENOSYS; if (ev->u.set_host_param.len > rlen || ev->u.set_host_param.len > PAGE_SIZE) return -EINVAL; shost = scsi_host_lookup(ev->u.set_host_param.host_no); if (!shost) { printk(KERN_ERR "set_host_param could not find host no %u\n", ev->u.set_host_param.host_no); return -ENODEV; } /* see similar check in iscsi_if_set_param() */ if (strlen(data) > ev->u.set_host_param.len) return -EINVAL; err = transport->set_host_param(shost, ev->u.set_host_param.param, data, ev->u.set_host_param.len); scsi_host_put(shost); return err; } static int iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev, u32 rlen) { struct Scsi_Host *shost; struct iscsi_path *params; int err; if (rlen < sizeof(*params)) return -EINVAL; if (!transport->set_path) return -ENOSYS; shost = scsi_host_lookup(ev->u.set_path.host_no); if (!shost) { printk(KERN_ERR "set path could not find host no %u\n", ev->u.set_path.host_no); return -ENODEV; } params = (struct iscsi_path *)((char *)ev + sizeof(*ev)); err = transport->set_path(shost, params); scsi_host_put(shost); return err; } static int iscsi_session_has_conns(int sid) { struct iscsi_cls_conn *conn; unsigned long flags; int found = 0; spin_lock_irqsave(&connlock, flags); list_for_each_entry(conn, &connlist, conn_list) { if (iscsi_conn_get_sid(conn) == sid) { found = 1; break; } } spin_unlock_irqrestore(&connlock, flags); return found; } static int iscsi_set_iface_params(struct iscsi_transport *transport, struct iscsi_uevent *ev, uint32_t len) { char *data = (char *)ev + sizeof(*ev); struct Scsi_Host *shost; int err; if (!transport->set_iface_param) return -ENOSYS; shost = scsi_host_lookup(ev->u.set_iface_params.host_no); if (!shost) { printk(KERN_ERR "set_iface_params could not find host no %u\n", ev->u.set_iface_params.host_no); return -ENODEV; } err = transport->set_iface_param(shost, data, len); scsi_host_put(shost); return err; } static int iscsi_send_ping(struct iscsi_transport *transport, struct iscsi_uevent *ev, u32 rlen) { struct Scsi_Host *shost; struct sockaddr *dst_addr; int err; if (rlen < sizeof(*dst_addr)) return -EINVAL; if (!transport->send_ping) return -ENOSYS; shost = scsi_host_lookup(ev->u.iscsi_ping.host_no); if (!shost) { printk(KERN_ERR "iscsi_ping could not find host no %u\n", ev->u.iscsi_ping.host_no); return -ENODEV; } dst_addr = (struct sockaddr *)((char *)ev + sizeof(*ev)); err = transport->send_ping(shost, ev->u.iscsi_ping.iface_num, ev->u.iscsi_ping.iface_type, ev->u.iscsi_ping.payload_size, ev->u.iscsi_ping.pid, dst_addr); scsi_host_put(shost); return err; } static int iscsi_get_chap(struct iscsi_transport *transport, struct nlmsghdr *nlh) { struct iscsi_uevent *ev = nlmsg_data(nlh); struct Scsi_Host *shost = NULL; struct iscsi_chap_rec *chap_rec; struct iscsi_internal *priv; struct sk_buff *skbchap; struct nlmsghdr *nlhchap; struct iscsi_uevent *evchap; uint32_t chap_buf_size; int len, err = 0; char *buf; if (!transport->get_chap) return -EINVAL; priv = iscsi_if_transport_lookup(transport); if (!priv) return -EINVAL; chap_buf_size = (ev->u.get_chap.num_entries * sizeof(*chap_rec)); len = nlmsg_total_size(sizeof(*ev) + chap_buf_size); shost = scsi_host_lookup(ev->u.get_chap.host_no); if (!shost) { printk(KERN_ERR "%s: failed. Could not find host no %u\n", __func__, ev->u.get_chap.host_no); return -ENODEV; } do { int actual_size; skbchap = alloc_skb(len, GFP_KERNEL); if (!skbchap) { printk(KERN_ERR "can not deliver chap: OOM\n"); err = -ENOMEM; goto exit_get_chap; } nlhchap = __nlmsg_put(skbchap, 0, 0, 0, (len - sizeof(*nlhchap)), 0); evchap = nlmsg_data(nlhchap); memset(evchap, 0, sizeof(*evchap)); evchap->transport_handle = iscsi_handle(transport); evchap->type = nlh->nlmsg_type; evchap->u.get_chap.host_no = ev->u.get_chap.host_no; evchap->u.get_chap.chap_tbl_idx = ev->u.get_chap.chap_tbl_idx; evchap->u.get_chap.num_entries = ev->u.get_chap.num_entries; buf = (char *)evchap + sizeof(*evchap); memset(buf, 0, chap_buf_size); err = transport->get_chap(shost, ev->u.get_chap.chap_tbl_idx, &evchap->u.get_chap.num_entries, buf); actual_size = nlmsg_total_size(sizeof(*ev) + chap_buf_size); skb_trim(skbchap, NLMSG_ALIGN(actual_size)); nlhchap->nlmsg_len = actual_size; err = iscsi_multicast_skb(skbchap, ISCSI_NL_GRP_ISCSID, GFP_KERNEL); } while (err < 0 && err != -ECONNREFUSED); exit_get_chap: scsi_host_put(shost); return err; } static int iscsi_set_chap(struct iscsi_transport *transport, struct iscsi_uevent *ev, uint32_t len) { char *data = (char *)ev + sizeof(*ev); struct Scsi_Host *shost; int err = 0; if (!transport->set_chap) return -ENOSYS; shost = scsi_host_lookup(ev->u.set_path.host_no); if (!shost) { pr_err("%s could not find host no %u\n", __func__, ev->u.set_path.host_no); return -ENODEV; } err = transport->set_chap(shost, data, len); scsi_host_put(shost); return err; } static int iscsi_delete_chap(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct Scsi_Host *shost; int err = 0; if (!transport->delete_chap) return -ENOSYS; shost = scsi_host_lookup(ev->u.delete_chap.host_no); if (!shost) { printk(KERN_ERR "%s could not find host no %u\n", __func__, ev->u.delete_chap.host_no); return -ENODEV; } err = transport->delete_chap(shost, ev->u.delete_chap.chap_tbl_idx); scsi_host_put(shost); return err; } static const struct { enum iscsi_discovery_parent_type value; char *name; } iscsi_discovery_parent_names[] = { {ISCSI_DISC_PARENT_UNKNOWN, "Unknown" }, {ISCSI_DISC_PARENT_SENDTGT, "Sendtarget" }, {ISCSI_DISC_PARENT_ISNS, "isns" }, }; char *iscsi_get_discovery_parent_name(int parent_type) { int i; char *state = "Unknown!"; for (i = 0; i < ARRAY_SIZE(iscsi_discovery_parent_names); i++) { if (iscsi_discovery_parent_names[i].value & parent_type) { state = iscsi_discovery_parent_names[i].name; break; } } return state; } EXPORT_SYMBOL_GPL(iscsi_get_discovery_parent_name); static int iscsi_set_flashnode_param(struct iscsi_transport *transport, struct iscsi_uevent *ev, uint32_t len) { char *data = (char *)ev + sizeof(*ev); struct Scsi_Host *shost; struct iscsi_bus_flash_session *fnode_sess; struct iscsi_bus_flash_conn *fnode_conn; struct device *dev; uint32_t idx; int err = 0; if (!transport->set_flashnode_param) { err = -ENOSYS; goto exit_set_fnode; } shost = scsi_host_lookup(ev->u.set_flashnode.host_no); if (!shost) { pr_err("%s could not find host no %u\n", __func__, ev->u.set_flashnode.host_no); err = -ENODEV; goto exit_set_fnode; } idx = ev->u.set_flashnode.flashnode_idx; fnode_sess = iscsi_get_flashnode_by_index(shost, idx); if (!fnode_sess) { pr_err("%s could not find flashnode %u for host no %u\n", __func__, idx, ev->u.set_flashnode.host_no); err = -ENODEV; goto put_host; } dev = iscsi_find_flashnode_conn(fnode_sess); if (!dev) { err = -ENODEV; goto put_sess; } fnode_conn = iscsi_dev_to_flash_conn(dev); err = transport->set_flashnode_param(fnode_sess, fnode_conn, data, len); put_device(dev); put_sess: put_device(&fnode_sess->dev); put_host: scsi_host_put(shost); exit_set_fnode: return err; } static int iscsi_new_flashnode(struct iscsi_transport *transport, struct iscsi_uevent *ev, uint32_t len) { char *data = (char *)ev + sizeof(*ev); struct Scsi_Host *shost; int index; int err = 0; if (!transport->new_flashnode) { err = -ENOSYS; goto exit_new_fnode; } shost = scsi_host_lookup(ev->u.new_flashnode.host_no); if (!shost) { pr_err("%s could not find host no %u\n", __func__, ev->u.new_flashnode.host_no); err = -ENODEV; goto put_host; } index = transport->new_flashnode(shost, data, len); if (index >= 0) ev->r.new_flashnode_ret.flashnode_idx = index; else err = -EIO; put_host: scsi_host_put(shost); exit_new_fnode: return err; } static int iscsi_del_flashnode(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct Scsi_Host *shost; struct iscsi_bus_flash_session *fnode_sess; uint32_t idx; int err = 0; if (!transport->del_flashnode) { err = -ENOSYS; goto exit_del_fnode; } shost = scsi_host_lookup(ev->u.del_flashnode.host_no); if (!shost) { pr_err("%s could not find host no %u\n", __func__, ev->u.del_flashnode.host_no); err = -ENODEV; goto put_host; } idx = ev->u.del_flashnode.flashnode_idx; fnode_sess = iscsi_get_flashnode_by_index(shost, idx); if (!fnode_sess) { pr_err("%s could not find flashnode %u for host no %u\n", __func__, idx, ev->u.del_flashnode.host_no); err = -ENODEV; goto put_host; } err = transport->del_flashnode(fnode_sess); put_device(&fnode_sess->dev); put_host: scsi_host_put(shost); exit_del_fnode: return err; } static int iscsi_login_flashnode(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct Scsi_Host *shost; struct iscsi_bus_flash_session *fnode_sess; struct iscsi_bus_flash_conn *fnode_conn; struct device *dev; uint32_t idx; int err = 0; if (!transport->login_flashnode) { err = -ENOSYS; goto exit_login_fnode; } shost = scsi_host_lookup(ev->u.login_flashnode.host_no); if (!shost) { pr_err("%s could not find host no %u\n", __func__, ev->u.login_flashnode.host_no); err = -ENODEV; goto put_host; } idx = ev->u.login_flashnode.flashnode_idx; fnode_sess = iscsi_get_flashnode_by_index(shost, idx); if (!fnode_sess) { pr_err("%s could not find flashnode %u for host no %u\n", __func__, idx, ev->u.login_flashnode.host_no); err = -ENODEV; goto put_host; } dev = iscsi_find_flashnode_conn(fnode_sess); if (!dev) { err = -ENODEV; goto put_sess; } fnode_conn = iscsi_dev_to_flash_conn(dev); err = transport->login_flashnode(fnode_sess, fnode_conn); put_device(dev); put_sess: put_device(&fnode_sess->dev); put_host: scsi_host_put(shost); exit_login_fnode: return err; } static int iscsi_logout_flashnode(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct Scsi_Host *shost; struct iscsi_bus_flash_session *fnode_sess; struct iscsi_bus_flash_conn *fnode_conn; struct device *dev; uint32_t idx; int err = 0; if (!transport->logout_flashnode) { err = -ENOSYS; goto exit_logout_fnode; } shost = scsi_host_lookup(ev->u.logout_flashnode.host_no); if (!shost) { pr_err("%s could not find host no %u\n", __func__, ev->u.logout_flashnode.host_no); err = -ENODEV; goto put_host; } idx = ev->u.logout_flashnode.flashnode_idx; fnode_sess = iscsi_get_flashnode_by_index(shost, idx); if (!fnode_sess) { pr_err("%s could not find flashnode %u for host no %u\n", __func__, idx, ev->u.logout_flashnode.host_no); err = -ENODEV; goto put_host; } dev = iscsi_find_flashnode_conn(fnode_sess); if (!dev) { err = -ENODEV; goto put_sess; } fnode_conn = iscsi_dev_to_flash_conn(dev); err = transport->logout_flashnode(fnode_sess, fnode_conn); put_device(dev); put_sess: put_device(&fnode_sess->dev); put_host: scsi_host_put(shost); exit_logout_fnode: return err; } static int iscsi_logout_flashnode_sid(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct Scsi_Host *shost; struct iscsi_cls_session *session; int err = 0; if (!transport->logout_flashnode_sid) { err = -ENOSYS; goto exit_logout_sid; } shost = scsi_host_lookup(ev->u.logout_flashnode_sid.host_no); if (!shost) { pr_err("%s could not find host no %u\n", __func__, ev->u.logout_flashnode.host_no); err = -ENODEV; goto put_host; } session = iscsi_session_lookup(ev->u.logout_flashnode_sid.sid); if (!session) { pr_err("%s could not find session id %u\n", __func__, ev->u.logout_flashnode_sid.sid); err = -EINVAL; goto put_host; } err = transport->logout_flashnode_sid(session); put_host: scsi_host_put(shost); exit_logout_sid: return err; } static int iscsi_get_host_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh) { struct iscsi_uevent *ev = nlmsg_data(nlh); struct Scsi_Host *shost = NULL; struct iscsi_internal *priv; struct sk_buff *skbhost_stats; struct nlmsghdr *nlhhost_stats; struct iscsi_uevent *evhost_stats; int host_stats_size = 0; int len, err = 0; char *buf; if (!transport->get_host_stats) return -ENOSYS; priv = iscsi_if_transport_lookup(transport); if (!priv) return -EINVAL; host_stats_size = sizeof(struct iscsi_offload_host_stats); len = nlmsg_total_size(sizeof(*ev) + host_stats_size); shost = scsi_host_lookup(ev->u.get_host_stats.host_no); if (!shost) { pr_err("%s: failed. Could not find host no %u\n", __func__, ev->u.get_host_stats.host_no); return -ENODEV; } do { int actual_size; skbhost_stats = alloc_skb(len, GFP_KERNEL); if (!skbhost_stats) { pr_err("cannot deliver host stats: OOM\n"); err = -ENOMEM; goto exit_host_stats; } nlhhost_stats = __nlmsg_put(skbhost_stats, 0, 0, 0, (len - sizeof(*nlhhost_stats)), 0); evhost_stats = nlmsg_data(nlhhost_stats); memset(evhost_stats, 0, sizeof(*evhost_stats)); evhost_stats->transport_handle = iscsi_handle(transport); evhost_stats->type = nlh->nlmsg_type; evhost_stats->u.get_host_stats.host_no = ev->u.get_host_stats.host_no; buf = (char *)evhost_stats + sizeof(*evhost_stats); memset(buf, 0, host_stats_size); err = transport->get_host_stats(shost, buf, host_stats_size); if (err) { kfree_skb(skbhost_stats); goto exit_host_stats; } actual_size = nlmsg_total_size(sizeof(*ev) + host_stats_size); skb_trim(skbhost_stats, NLMSG_ALIGN(actual_size)); nlhhost_stats->nlmsg_len = actual_size; err = iscsi_multicast_skb(skbhost_stats, ISCSI_NL_GRP_ISCSID, GFP_KERNEL); } while (err < 0 && err != -ECONNREFUSED); exit_host_stats: scsi_host_put(shost); return err; } static int iscsi_if_transport_conn(struct iscsi_transport *transport, struct nlmsghdr *nlh, u32 pdu_len) { struct iscsi_uevent *ev = nlmsg_data(nlh); struct iscsi_cls_session *session; struct iscsi_cls_conn *conn = NULL; struct iscsi_endpoint *ep; int err = 0; switch (nlh->nlmsg_type) { case ISCSI_UEVENT_CREATE_CONN: return iscsi_if_create_conn(transport, ev); case ISCSI_UEVENT_DESTROY_CONN: return iscsi_if_destroy_conn(transport, ev); case ISCSI_UEVENT_STOP_CONN: conn = iscsi_conn_lookup(ev->u.stop_conn.sid, ev->u.stop_conn.cid); if (!conn) return -EINVAL; return iscsi_if_stop_conn(conn, ev->u.stop_conn.flag); } /* * The following cmds need to be run under the ep_mutex so in kernel * conn cleanup (ep_disconnect + unbind and conn) is not done while * these are running. They also must not run if we have just run a conn * cleanup because they would set the state in a way that might allow * IO or send IO themselves. */ switch (nlh->nlmsg_type) { case ISCSI_UEVENT_START_CONN: conn = iscsi_conn_lookup(ev->u.start_conn.sid, ev->u.start_conn.cid); break; case ISCSI_UEVENT_BIND_CONN: conn = iscsi_conn_lookup(ev->u.b_conn.sid, ev->u.b_conn.cid); break; case ISCSI_UEVENT_SEND_PDU: conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid); break; } if (!conn) return -EINVAL; mutex_lock(&conn->ep_mutex); spin_lock_irq(&conn->lock); if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { spin_unlock_irq(&conn->lock); mutex_unlock(&conn->ep_mutex); ev->r.retcode = -ENOTCONN; return 0; } spin_unlock_irq(&conn->lock); switch (nlh->nlmsg_type) { case ISCSI_UEVENT_BIND_CONN: session = iscsi_session_lookup(ev->u.b_conn.sid); if (!session) { err = -EINVAL; break; } ev->r.retcode = transport->bind_conn(session, conn, ev->u.b_conn.transport_eph, ev->u.b_conn.is_leading); if (!ev->r.retcode) WRITE_ONCE(conn->state, ISCSI_CONN_BOUND); if (ev->r.retcode || !transport->ep_connect) break; ep = iscsi_lookup_endpoint(ev->u.b_conn.transport_eph); if (ep) { ep->conn = conn; conn->ep = ep; iscsi_put_endpoint(ep); } else { err = -ENOTCONN; iscsi_cls_conn_printk(KERN_ERR, conn, "Could not set ep conn binding\n"); } break; case ISCSI_UEVENT_START_CONN: ev->r.retcode = transport->start_conn(conn); if (!ev->r.retcode) WRITE_ONCE(conn->state, ISCSI_CONN_UP); break; case ISCSI_UEVENT_SEND_PDU: if ((ev->u.send_pdu.hdr_size > pdu_len) || (ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) { err = -EINVAL; break; } ev->r.retcode = transport->send_pdu(conn, (struct iscsi_hdr *)((char *)ev + sizeof(*ev)), (char *)ev + sizeof(*ev) + ev->u.send_pdu.hdr_size, ev->u.send_pdu.data_size); break; default: err = -ENOSYS; } mutex_unlock(&conn->ep_mutex); return err; } static int iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) { int err = 0; u32 portid; struct iscsi_uevent *ev = nlmsg_data(nlh); struct iscsi_transport *transport = NULL; struct iscsi_internal *priv; struct iscsi_cls_session *session; struct iscsi_endpoint *ep = NULL; u32 rlen; if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; if (nlh->nlmsg_type == ISCSI_UEVENT_PATH_UPDATE) *group = ISCSI_NL_GRP_UIP; else *group = ISCSI_NL_GRP_ISCSID; priv = iscsi_if_transport_lookup(iscsi_ptr(ev->transport_handle)); if (!priv) return -EINVAL; transport = priv->iscsi_transport; if (!try_module_get(transport->owner)) return -EINVAL; portid = NETLINK_CB(skb).portid; /* * Even though the remaining payload may not be regarded as nlattr, * (like address or something else), calculate the remaining length * here to ease following length checks. */ rlen = nlmsg_attrlen(nlh, sizeof(*ev)); switch (nlh->nlmsg_type) { case ISCSI_UEVENT_CREATE_SESSION: err = iscsi_if_create_session(priv, ep, ev, portid, ev->u.c_session.initial_cmdsn, ev->u.c_session.cmds_max, ev->u.c_session.queue_depth); break; case ISCSI_UEVENT_CREATE_BOUND_SESSION: ep = iscsi_lookup_endpoint(ev->u.c_bound_session.ep_handle); if (!ep) { err = -EINVAL; break; } err = iscsi_if_create_session(priv, ep, ev, portid, ev->u.c_bound_session.initial_cmdsn, ev->u.c_bound_session.cmds_max, ev->u.c_bound_session.queue_depth); iscsi_put_endpoint(ep); break; case ISCSI_UEVENT_DESTROY_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); if (!session) err = -EINVAL; else if (iscsi_session_has_conns(ev->u.d_session.sid)) err = -EBUSY; else transport->destroy_session(session); break; case ISCSI_UEVENT_DESTROY_SESSION_ASYNC: session = iscsi_session_lookup(ev->u.d_session.sid); if (!session) err = -EINVAL; else if (iscsi_session_has_conns(ev->u.d_session.sid)) err = -EBUSY; else { unsigned long flags; /* Prevent this session from being found again */ spin_lock_irqsave(&sesslock, flags); list_del_init(&session->sess_list); spin_unlock_irqrestore(&sesslock, flags); queue_work(system_unbound_wq, &session->destroy_work); } break; case ISCSI_UEVENT_UNBIND_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); if (session) queue_work(session->workq, &session->unbind_work); else err = -EINVAL; break; case ISCSI_UEVENT_SET_PARAM: err = iscsi_if_set_param(transport, ev, rlen); break; case ISCSI_UEVENT_CREATE_CONN: case ISCSI_UEVENT_DESTROY_CONN: case ISCSI_UEVENT_STOP_CONN: case ISCSI_UEVENT_START_CONN: case ISCSI_UEVENT_BIND_CONN: case ISCSI_UEVENT_SEND_PDU: err = iscsi_if_transport_conn(transport, nlh, rlen); break; case ISCSI_UEVENT_GET_STATS: err = iscsi_if_get_stats(transport, nlh); break; case ISCSI_UEVENT_TRANSPORT_EP_CONNECT: case ISCSI_UEVENT_TRANSPORT_EP_POLL: case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT: case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST: err = iscsi_if_transport_ep(transport, ev, nlh->nlmsg_type, rlen); break; case ISCSI_UEVENT_TGT_DSCVR: err = iscsi_tgt_dscvr(transport, ev, rlen); break; case ISCSI_UEVENT_SET_HOST_PARAM: err = iscsi_set_host_param(transport, ev, rlen); break; case ISCSI_UEVENT_PATH_UPDATE: err = iscsi_set_path(transport, ev, rlen); break; case ISCSI_UEVENT_SET_IFACE_PARAMS: err = iscsi_set_iface_params(transport, ev, rlen); break; case ISCSI_UEVENT_PING: err = iscsi_send_ping(transport, ev, rlen); break; case ISCSI_UEVENT_GET_CHAP: err = iscsi_get_chap(transport, nlh); break; case ISCSI_UEVENT_DELETE_CHAP: err = iscsi_delete_chap(transport, ev); break; case ISCSI_UEVENT_SET_FLASHNODE_PARAMS: err = iscsi_set_flashnode_param(transport, ev, rlen); break; case ISCSI_UEVENT_NEW_FLASHNODE: err = iscsi_new_flashnode(transport, ev, rlen); break; case ISCSI_UEVENT_DEL_FLASHNODE: err = iscsi_del_flashnode(transport, ev); break; case ISCSI_UEVENT_LOGIN_FLASHNODE: err = iscsi_login_flashnode(transport, ev); break; case ISCSI_UEVENT_LOGOUT_FLASHNODE: err = iscsi_logout_flashnode(transport, ev); break; case ISCSI_UEVENT_LOGOUT_FLASHNODE_SID: err = iscsi_logout_flashnode_sid(transport, ev); break; case ISCSI_UEVENT_SET_CHAP: err = iscsi_set_chap(transport, ev, rlen); break; case ISCSI_UEVENT_GET_HOST_STATS: err = iscsi_get_host_stats(transport, nlh); break; default: err = -ENOSYS; break; } module_put(transport->owner); return err; } /* * Get message from skb. Each message is processed by iscsi_if_recv_msg. * Malformed skbs with wrong lengths or invalid creds are not processed. */ static void iscsi_if_rx(struct sk_buff *skb) { u32 portid = NETLINK_CB(skb).portid; mutex_lock(&rx_queue_mutex); while (skb->len >= NLMSG_HDRLEN) { int err; uint32_t rlen; struct nlmsghdr *nlh; struct iscsi_uevent *ev; uint32_t group; int retries = ISCSI_SEND_MAX_ALLOWED; nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < sizeof(*nlh) + sizeof(*ev) || skb->len < nlh->nlmsg_len) { break; } ev = nlmsg_data(nlh); rlen = NLMSG_ALIGN(nlh->nlmsg_len); if (rlen > skb->len) rlen = skb->len; err = iscsi_if_recv_msg(skb, nlh, &group); if (err) { ev->type = ISCSI_KEVENT_IF_ERROR; ev->iferror = err; } do { /* * special case for GET_STATS: * on success - sending reply and stats from * inside of if_recv_msg(), * on error - fall through. */ if (ev->type == ISCSI_UEVENT_GET_STATS && !err) break; if (ev->type == ISCSI_UEVENT_GET_CHAP && !err) break; err = iscsi_if_send_reply(portid, nlh->nlmsg_type, ev, sizeof(*ev)); if (err == -EAGAIN && --retries < 0) { printk(KERN_WARNING "Send reply failed, error %d\n", err); break; } } while (err < 0 && err != -ECONNREFUSED && err != -ESRCH); skb_pull(skb, rlen); } mutex_unlock(&rx_queue_mutex); } #define ISCSI_CLASS_ATTR(_prefix,_name,_mode,_show,_store) \ struct device_attribute dev_attr_##_prefix##_##_name = \ __ATTR(_name,_mode,_show,_store) /* * iSCSI connection attrs */ #define iscsi_conn_attr_show(param) \ static ssize_t \ show_conn_param_##param(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent); \ struct iscsi_transport *t = conn->transport; \ return t->get_conn_param(conn, param, buf); \ } #define iscsi_conn_attr(field, param) \ iscsi_conn_attr_show(param) \ static ISCSI_CLASS_ATTR(conn, field, S_IRUGO, show_conn_param_##param, \ NULL); iscsi_conn_attr(max_recv_dlength, ISCSI_PARAM_MAX_RECV_DLENGTH); iscsi_conn_attr(max_xmit_dlength, ISCSI_PARAM_MAX_XMIT_DLENGTH); iscsi_conn_attr(header_digest, ISCSI_PARAM_HDRDGST_EN); iscsi_conn_attr(data_digest, ISCSI_PARAM_DATADGST_EN); iscsi_conn_attr(ifmarker, ISCSI_PARAM_IFMARKER_EN); iscsi_conn_attr(ofmarker, ISCSI_PARAM_OFMARKER_EN); iscsi_conn_attr(persistent_port, ISCSI_PARAM_PERSISTENT_PORT); iscsi_conn_attr(exp_statsn, ISCSI_PARAM_EXP_STATSN); iscsi_conn_attr(persistent_address, ISCSI_PARAM_PERSISTENT_ADDRESS); iscsi_conn_attr(ping_tmo, ISCSI_PARAM_PING_TMO); iscsi_conn_attr(recv_tmo, ISCSI_PARAM_RECV_TMO); iscsi_conn_attr(local_port, ISCSI_PARAM_LOCAL_PORT); iscsi_conn_attr(statsn, ISCSI_PARAM_STATSN); iscsi_conn_attr(keepalive_tmo, ISCSI_PARAM_KEEPALIVE_TMO); iscsi_conn_attr(max_segment_size, ISCSI_PARAM_MAX_SEGMENT_SIZE); iscsi_conn_attr(tcp_timestamp_stat, ISCSI_PARAM_TCP_TIMESTAMP_STAT); iscsi_conn_attr(tcp_wsf_disable, ISCSI_PARAM_TCP_WSF_DISABLE); iscsi_conn_attr(tcp_nagle_disable, ISCSI_PARAM_TCP_NAGLE_DISABLE); iscsi_conn_attr(tcp_timer_scale, ISCSI_PARAM_TCP_TIMER_SCALE); iscsi_conn_attr(tcp_timestamp_enable, ISCSI_PARAM_TCP_TIMESTAMP_EN); iscsi_conn_attr(fragment_disable, ISCSI_PARAM_IP_FRAGMENT_DISABLE); iscsi_conn_attr(ipv4_tos, ISCSI_PARAM_IPV4_TOS); iscsi_conn_attr(ipv6_traffic_class, ISCSI_PARAM_IPV6_TC); iscsi_conn_attr(ipv6_flow_label, ISCSI_PARAM_IPV6_FLOW_LABEL); iscsi_conn_attr(is_fw_assigned_ipv6, ISCSI_PARAM_IS_FW_ASSIGNED_IPV6); iscsi_conn_attr(tcp_xmit_wsf, ISCSI_PARAM_TCP_XMIT_WSF); iscsi_conn_attr(tcp_recv_wsf, ISCSI_PARAM_TCP_RECV_WSF); iscsi_conn_attr(local_ipaddr, ISCSI_PARAM_LOCAL_IPADDR); static const char *const connection_state_names[] = { [ISCSI_CONN_UP] = "up", [ISCSI_CONN_DOWN] = "down", [ISCSI_CONN_FAILED] = "failed", [ISCSI_CONN_BOUND] = "bound" }; static ssize_t show_conn_state(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent); const char *state = "unknown"; int conn_state = READ_ONCE(conn->state); if (conn_state >= 0 && conn_state < ARRAY_SIZE(connection_state_names)) state = connection_state_names[conn_state]; return sysfs_emit(buf, "%s\n", state); } static ISCSI_CLASS_ATTR(conn, state, S_IRUGO, show_conn_state, NULL); #define iscsi_conn_ep_attr_show(param) \ static ssize_t show_conn_ep_param_##param(struct device *dev, \ struct device_attribute *attr,\ char *buf) \ { \ struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent); \ struct iscsi_transport *t = conn->transport; \ struct iscsi_endpoint *ep; \ ssize_t rc; \ \ /* \ * Need to make sure ep_disconnect does not free the LLD's \ * interconnect resources while we are trying to read them. \ */ \ mutex_lock(&conn->ep_mutex); \ ep = conn->ep; \ if (!ep && t->ep_connect) { \ mutex_unlock(&conn->ep_mutex); \ return -ENOTCONN; \ } \ \ if (ep) \ rc = t->get_ep_param(ep, param, buf); \ else \ rc = t->get_conn_param(conn, param, buf); \ mutex_unlock(&conn->ep_mutex); \ return rc; \ } #define iscsi_conn_ep_attr(field, param) \ iscsi_conn_ep_attr_show(param) \ static ISCSI_CLASS_ATTR(conn, field, S_IRUGO, \ show_conn_ep_param_##param, NULL); iscsi_conn_ep_attr(address, ISCSI_PARAM_CONN_ADDRESS); iscsi_conn_ep_attr(port, ISCSI_PARAM_CONN_PORT); static struct attribute *iscsi_conn_attrs[] = { &dev_attr_conn_max_recv_dlength.attr, &dev_attr_conn_max_xmit_dlength.attr, &dev_attr_conn_header_digest.attr, &dev_attr_conn_data_digest.attr, &dev_attr_conn_ifmarker.attr, &dev_attr_conn_ofmarker.attr, &dev_attr_conn_address.attr, &dev_attr_conn_port.attr, &dev_attr_conn_exp_statsn.attr, &dev_attr_conn_persistent_address.attr, &dev_attr_conn_persistent_port.attr, &dev_attr_conn_ping_tmo.attr, &dev_attr_conn_recv_tmo.attr, &dev_attr_conn_local_port.attr, &dev_attr_conn_statsn.attr, &dev_attr_conn_keepalive_tmo.attr, &dev_attr_conn_max_segment_size.attr, &dev_attr_conn_tcp_timestamp_stat.attr, &dev_attr_conn_tcp_wsf_disable.attr, &dev_attr_conn_tcp_nagle_disable.attr, &dev_attr_conn_tcp_timer_scale.attr, &dev_attr_conn_tcp_timestamp_enable.attr, &dev_attr_conn_fragment_disable.attr, &dev_attr_conn_ipv4_tos.attr, &dev_attr_conn_ipv6_traffic_class.attr, &dev_attr_conn_ipv6_flow_label.attr, &dev_attr_conn_is_fw_assigned_ipv6.attr, &dev_attr_conn_tcp_xmit_wsf.attr, &dev_attr_conn_tcp_recv_wsf.attr, &dev_attr_conn_local_ipaddr.attr, &dev_attr_conn_state.attr, NULL, }; static umode_t iscsi_conn_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *cdev = container_of(kobj, struct device, kobj); struct iscsi_cls_conn *conn = transport_class_to_conn(cdev); struct iscsi_transport *t = conn->transport; int param; if (attr == &dev_attr_conn_max_recv_dlength.attr) param = ISCSI_PARAM_MAX_RECV_DLENGTH; else if (attr == &dev_attr_conn_max_xmit_dlength.attr) param = ISCSI_PARAM_MAX_XMIT_DLENGTH; else if (attr == &dev_attr_conn_header_digest.attr) param = ISCSI_PARAM_HDRDGST_EN; else if (attr == &dev_attr_conn_data_digest.attr) param = ISCSI_PARAM_DATADGST_EN; else if (attr == &dev_attr_conn_ifmarker.attr) param = ISCSI_PARAM_IFMARKER_EN; else if (attr == &dev_attr_conn_ofmarker.attr) param = ISCSI_PARAM_OFMARKER_EN; else if (attr == &dev_attr_conn_address.attr) param = ISCSI_PARAM_CONN_ADDRESS; else if (attr == &dev_attr_conn_port.attr) param = ISCSI_PARAM_CONN_PORT; else if (attr == &dev_attr_conn_exp_statsn.attr) param = ISCSI_PARAM_EXP_STATSN; else if (attr == &dev_attr_conn_persistent_address.attr) param = ISCSI_PARAM_PERSISTENT_ADDRESS; else if (attr == &dev_attr_conn_persistent_port.attr) param = ISCSI_PARAM_PERSISTENT_PORT; else if (attr == &dev_attr_conn_ping_tmo.attr) param = ISCSI_PARAM_PING_TMO; else if (attr == &dev_attr_conn_recv_tmo.attr) param = ISCSI_PARAM_RECV_TMO; else if (attr == &dev_attr_conn_local_port.attr) param = ISCSI_PARAM_LOCAL_PORT; else if (attr == &dev_attr_conn_statsn.attr) param = ISCSI_PARAM_STATSN; else if (attr == &dev_attr_conn_keepalive_tmo.attr) param = ISCSI_PARAM_KEEPALIVE_TMO; else if (attr == &dev_attr_conn_max_segment_size.attr) param = ISCSI_PARAM_MAX_SEGMENT_SIZE; else if (attr == &dev_attr_conn_tcp_timestamp_stat.attr) param = ISCSI_PARAM_TCP_TIMESTAMP_STAT; else if (attr == &dev_attr_conn_tcp_wsf_disable.attr) param = ISCSI_PARAM_TCP_WSF_DISABLE; else if (attr == &dev_attr_conn_tcp_nagle_disable.attr) param = ISCSI_PARAM_TCP_NAGLE_DISABLE; else if (attr == &dev_attr_conn_tcp_timer_scale.attr) param = ISCSI_PARAM_TCP_TIMER_SCALE; else if (attr == &dev_attr_conn_tcp_timestamp_enable.attr) param = ISCSI_PARAM_TCP_TIMESTAMP_EN; else if (attr == &dev_attr_conn_fragment_disable.attr) param = ISCSI_PARAM_IP_FRAGMENT_DISABLE; else if (attr == &dev_attr_conn_ipv4_tos.attr) param = ISCSI_PARAM_IPV4_TOS; else if (attr == &dev_attr_conn_ipv6_traffic_class.attr) param = ISCSI_PARAM_IPV6_TC; else if (attr == &dev_attr_conn_ipv6_flow_label.attr) param = ISCSI_PARAM_IPV6_FLOW_LABEL; else if (attr == &dev_attr_conn_is_fw_assigned_ipv6.attr) param = ISCSI_PARAM_IS_FW_ASSIGNED_IPV6; else if (attr == &dev_attr_conn_tcp_xmit_wsf.attr) param = ISCSI_PARAM_TCP_XMIT_WSF; else if (attr == &dev_attr_conn_tcp_recv_wsf.attr) param = ISCSI_PARAM_TCP_RECV_WSF; else if (attr == &dev_attr_conn_local_ipaddr.attr) param = ISCSI_PARAM_LOCAL_IPADDR; else if (attr == &dev_attr_conn_state.attr) return S_IRUGO; else { WARN_ONCE(1, "Invalid conn attr"); return 0; } return t->attr_is_visible(ISCSI_PARAM, param); } static struct attribute_group iscsi_conn_group = { .attrs = iscsi_conn_attrs, .is_visible = iscsi_conn_attr_is_visible, }; /* * iSCSI session attrs */ #define iscsi_session_attr_show(param, perm) \ static ssize_t \ show_session_param_##param(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct iscsi_cls_session *session = \ iscsi_dev_to_session(dev->parent); \ struct iscsi_transport *t = session->transport; \ \ if (perm && !capable(CAP_SYS_ADMIN)) \ return -EACCES; \ return t->get_session_param(session, param, buf); \ } #define iscsi_session_attr(field, param, perm) \ iscsi_session_attr_show(param, perm) \ static ISCSI_CLASS_ATTR(sess, field, S_IRUGO, show_session_param_##param, \ NULL); iscsi_session_attr(targetname, ISCSI_PARAM_TARGET_NAME, 0); iscsi_session_attr(initial_r2t, ISCSI_PARAM_INITIAL_R2T_EN, 0); iscsi_session_attr(max_outstanding_r2t, ISCSI_PARAM_MAX_R2T, 0); iscsi_session_attr(immediate_data, ISCSI_PARAM_IMM_DATA_EN, 0); iscsi_session_attr(first_burst_len, ISCSI_PARAM_FIRST_BURST, 0); iscsi_session_attr(max_burst_len, ISCSI_PARAM_MAX_BURST, 0); iscsi_session_attr(data_pdu_in_order, ISCSI_PARAM_PDU_INORDER_EN, 0); iscsi_session_attr(data_seq_in_order, ISCSI_PARAM_DATASEQ_INORDER_EN, 0); iscsi_session_attr(erl, ISCSI_PARAM_ERL, 0); iscsi_session_attr(tpgt, ISCSI_PARAM_TPGT, 0); iscsi_session_attr(username, ISCSI_PARAM_USERNAME, 1); iscsi_session_attr(username_in, ISCSI_PARAM_USERNAME_IN, 1); iscsi_session_attr(password, ISCSI_PARAM_PASSWORD, 1); iscsi_session_attr(password_in, ISCSI_PARAM_PASSWORD_IN, 1); iscsi_session_attr(chap_out_idx, ISCSI_PARAM_CHAP_OUT_IDX, 1); iscsi_session_attr(chap_in_idx, ISCSI_PARAM_CHAP_IN_IDX, 1); iscsi_session_attr(fast_abort, ISCSI_PARAM_FAST_ABORT, 0); iscsi_session_attr(abort_tmo, ISCSI_PARAM_ABORT_TMO, 0); iscsi_session_attr(lu_reset_tmo, ISCSI_PARAM_LU_RESET_TMO, 0); iscsi_session_attr(tgt_reset_tmo, ISCSI_PARAM_TGT_RESET_TMO, 0); iscsi_session_attr(ifacename, ISCSI_PARAM_IFACE_NAME, 0); iscsi_session_attr(initiatorname, ISCSI_PARAM_INITIATOR_NAME, 0); iscsi_session_attr(targetalias, ISCSI_PARAM_TARGET_ALIAS, 0); iscsi_session_attr(boot_root, ISCSI_PARAM_BOOT_ROOT, 0); iscsi_session_attr(boot_nic, ISCSI_PARAM_BOOT_NIC, 0); iscsi_session_attr(boot_target, ISCSI_PARAM_BOOT_TARGET, 0); iscsi_session_attr(auto_snd_tgt_disable, ISCSI_PARAM_AUTO_SND_TGT_DISABLE, 0); iscsi_session_attr(discovery_session, ISCSI_PARAM_DISCOVERY_SESS, 0); iscsi_session_attr(portal_type, ISCSI_PARAM_PORTAL_TYPE, 0); iscsi_session_attr(chap_auth, ISCSI_PARAM_CHAP_AUTH_EN, 0); iscsi_session_attr(discovery_logout, ISCSI_PARAM_DISCOVERY_LOGOUT_EN, 0); iscsi_session_attr(bidi_chap, ISCSI_PARAM_BIDI_CHAP_EN, 0); iscsi_session_attr(discovery_auth_optional, ISCSI_PARAM_DISCOVERY_AUTH_OPTIONAL, 0); iscsi_session_attr(def_time2wait, ISCSI_PARAM_DEF_TIME2WAIT, 0); iscsi_session_attr(def_time2retain, ISCSI_PARAM_DEF_TIME2RETAIN, 0); iscsi_session_attr(isid, ISCSI_PARAM_ISID, 0); iscsi_session_attr(tsid, ISCSI_PARAM_TSID, 0); iscsi_session_attr(def_taskmgmt_tmo, ISCSI_PARAM_DEF_TASKMGMT_TMO, 0); iscsi_session_attr(discovery_parent_idx, ISCSI_PARAM_DISCOVERY_PARENT_IDX, 0); iscsi_session_attr(discovery_parent_type, ISCSI_PARAM_DISCOVERY_PARENT_TYPE, 0); static ssize_t show_priv_session_target_state(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); return sysfs_emit(buf, "%s\n", iscsi_session_target_state_name[session->target_state]); } static ISCSI_CLASS_ATTR(priv_sess, target_state, S_IRUGO, show_priv_session_target_state, NULL); static ssize_t show_priv_session_state(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); return sysfs_emit(buf, "%s\n", iscsi_session_state_name(session->state)); } static ISCSI_CLASS_ATTR(priv_sess, state, S_IRUGO, show_priv_session_state, NULL); static ssize_t show_priv_session_creator(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); return sysfs_emit(buf, "%d\n", session->creator); } static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator, NULL); static ssize_t show_priv_session_target_id(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); return sysfs_emit(buf, "%d\n", session->target_id); } static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO, show_priv_session_target_id, NULL); #define iscsi_priv_session_attr_show(field, format) \ static ssize_t \ show_priv_session_##field(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct iscsi_cls_session *session = \ iscsi_dev_to_session(dev->parent); \ if (session->field == -1) \ return sysfs_emit(buf, "off\n"); \ return sysfs_emit(buf, format"\n", session->field); \ } #define iscsi_priv_session_attr_store(field) \ static ssize_t \ store_priv_session_##field(struct device *dev, \ struct device_attribute *attr, \ const char *buf, size_t count) \ { \ int val; \ char *cp; \ struct iscsi_cls_session *session = \ iscsi_dev_to_session(dev->parent); \ if ((session->state == ISCSI_SESSION_FREE) || \ (session->state == ISCSI_SESSION_FAILED)) \ return -EBUSY; \ if (strncmp(buf, "off", 3) == 0) { \ session->field = -1; \ session->field##_sysfs_override = true; \ } else { \ val = simple_strtoul(buf, &cp, 0); \ if (*cp != '\0' && *cp != '\n') \ return -EINVAL; \ session->field = val; \ session->field##_sysfs_override = true; \ } \ return count; \ } #define iscsi_priv_session_rw_attr(field, format) \ iscsi_priv_session_attr_show(field, format) \ iscsi_priv_session_attr_store(field) \ static ISCSI_CLASS_ATTR(priv_sess, field, S_IRUGO | S_IWUSR, \ show_priv_session_##field, \ store_priv_session_##field) iscsi_priv_session_rw_attr(recovery_tmo, "%d"); static struct attribute *iscsi_session_attrs[] = { &dev_attr_sess_initial_r2t.attr, &dev_attr_sess_max_outstanding_r2t.attr, &dev_attr_sess_immediate_data.attr, &dev_attr_sess_first_burst_len.attr, &dev_attr_sess_max_burst_len.attr, &dev_attr_sess_data_pdu_in_order.attr, &dev_attr_sess_data_seq_in_order.attr, &dev_attr_sess_erl.attr, &dev_attr_sess_targetname.attr, &dev_attr_sess_tpgt.attr, &dev_attr_sess_password.attr, &dev_attr_sess_password_in.attr, &dev_attr_sess_username.attr, &dev_attr_sess_username_in.attr, &dev_attr_sess_fast_abort.attr, &dev_attr_sess_abort_tmo.attr, &dev_attr_sess_lu_reset_tmo.attr, &dev_attr_sess_tgt_reset_tmo.attr, &dev_attr_sess_ifacename.attr, &dev_attr_sess_initiatorname.attr, &dev_attr_sess_targetalias.attr, &dev_attr_sess_boot_root.attr, &dev_attr_sess_boot_nic.attr, &dev_attr_sess_boot_target.attr, &dev_attr_priv_sess_recovery_tmo.attr, &dev_attr_priv_sess_state.attr, &dev_attr_priv_sess_target_state.attr, &dev_attr_priv_sess_creator.attr, &dev_attr_sess_chap_out_idx.attr, &dev_attr_sess_chap_in_idx.attr, &dev_attr_priv_sess_target_id.attr, &dev_attr_sess_auto_snd_tgt_disable.attr, &dev_attr_sess_discovery_session.attr, &dev_attr_sess_portal_type.attr, &dev_attr_sess_chap_auth.attr, &dev_attr_sess_discovery_logout.attr, &dev_attr_sess_bidi_chap.attr, &dev_attr_sess_discovery_auth_optional.attr, &dev_attr_sess_def_time2wait.attr, &dev_attr_sess_def_time2retain.attr, &dev_attr_sess_isid.attr, &dev_attr_sess_tsid.attr, &dev_attr_sess_def_taskmgmt_tmo.attr, &dev_attr_sess_discovery_parent_idx.attr, &dev_attr_sess_discovery_parent_type.attr, NULL, }; static umode_t iscsi_session_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *cdev = container_of(kobj, struct device, kobj); struct iscsi_cls_session *session = transport_class_to_session(cdev); struct iscsi_transport *t = session->transport; int param; if (attr == &dev_attr_sess_initial_r2t.attr) param = ISCSI_PARAM_INITIAL_R2T_EN; else if (attr == &dev_attr_sess_max_outstanding_r2t.attr) param = ISCSI_PARAM_MAX_R2T; else if (attr == &dev_attr_sess_immediate_data.attr) param = ISCSI_PARAM_IMM_DATA_EN; else if (attr == &dev_attr_sess_first_burst_len.attr) param = ISCSI_PARAM_FIRST_BURST; else if (attr == &dev_attr_sess_max_burst_len.attr) param = ISCSI_PARAM_MAX_BURST; else if (attr == &dev_attr_sess_data_pdu_in_order.attr) param = ISCSI_PARAM_PDU_INORDER_EN; else if (attr == &dev_attr_sess_data_seq_in_order.attr) param = ISCSI_PARAM_DATASEQ_INORDER_EN; else if (attr == &dev_attr_sess_erl.attr) param = ISCSI_PARAM_ERL; else if (attr == &dev_attr_sess_targetname.attr) param = ISCSI_PARAM_TARGET_NAME; else if (attr == &dev_attr_sess_tpgt.attr) param = ISCSI_PARAM_TPGT; else if (attr == &dev_attr_sess_chap_in_idx.attr) param = ISCSI_PARAM_CHAP_IN_IDX; else if (attr == &dev_attr_sess_chap_out_idx.attr) param = ISCSI_PARAM_CHAP_OUT_IDX; else if (attr == &dev_attr_sess_password.attr) param = ISCSI_PARAM_USERNAME; else if (attr == &dev_attr_sess_password_in.attr) param = ISCSI_PARAM_USERNAME_IN; else if (attr == &dev_attr_sess_username.attr) param = ISCSI_PARAM_PASSWORD; else if (attr == &dev_attr_sess_username_in.attr) param = ISCSI_PARAM_PASSWORD_IN; else if (attr == &dev_attr_sess_fast_abort.attr) param = ISCSI_PARAM_FAST_ABORT; else if (attr == &dev_attr_sess_abort_tmo.attr) param = ISCSI_PARAM_ABORT_TMO; else if (attr == &dev_attr_sess_lu_reset_tmo.attr) param = ISCSI_PARAM_LU_RESET_TMO; else if (attr == &dev_attr_sess_tgt_reset_tmo.attr) param = ISCSI_PARAM_TGT_RESET_TMO; else if (attr == &dev_attr_sess_ifacename.attr) param = ISCSI_PARAM_IFACE_NAME; else if (attr == &dev_attr_sess_initiatorname.attr) param = ISCSI_PARAM_INITIATOR_NAME; else if (attr == &dev_attr_sess_targetalias.attr) param = ISCSI_PARAM_TARGET_ALIAS; else if (attr == &dev_attr_sess_boot_root.attr) param = ISCSI_PARAM_BOOT_ROOT; else if (attr == &dev_attr_sess_boot_nic.attr) param = ISCSI_PARAM_BOOT_NIC; else if (attr == &dev_attr_sess_boot_target.attr) param = ISCSI_PARAM_BOOT_TARGET; else if (attr == &dev_attr_sess_auto_snd_tgt_disable.attr) param = ISCSI_PARAM_AUTO_SND_TGT_DISABLE; else if (attr == &dev_attr_sess_discovery_session.attr) param = ISCSI_PARAM_DISCOVERY_SESS; else if (attr == &dev_attr_sess_portal_type.attr) param = ISCSI_PARAM_PORTAL_TYPE; else if (attr == &dev_attr_sess_chap_auth.attr) param = ISCSI_PARAM_CHAP_AUTH_EN; else if (attr == &dev_attr_sess_discovery_logout.attr) param = ISCSI_PARAM_DISCOVERY_LOGOUT_EN; else if (attr == &dev_attr_sess_bidi_chap.attr) param = ISCSI_PARAM_BIDI_CHAP_EN; else if (attr == &dev_attr_sess_discovery_auth_optional.attr) param = ISCSI_PARAM_DISCOVERY_AUTH_OPTIONAL; else if (attr == &dev_attr_sess_def_time2wait.attr) param = ISCSI_PARAM_DEF_TIME2WAIT; else if (attr == &dev_attr_sess_def_time2retain.attr) param = ISCSI_PARAM_DEF_TIME2RETAIN; else if (attr == &dev_attr_sess_isid.attr) param = ISCSI_PARAM_ISID; else if (attr == &dev_attr_sess_tsid.attr) param = ISCSI_PARAM_TSID; else if (attr == &dev_attr_sess_def_taskmgmt_tmo.attr) param = ISCSI_PARAM_DEF_TASKMGMT_TMO; else if (attr == &dev_attr_sess_discovery_parent_idx.attr) param = ISCSI_PARAM_DISCOVERY_PARENT_IDX; else if (attr == &dev_attr_sess_discovery_parent_type.attr) param = ISCSI_PARAM_DISCOVERY_PARENT_TYPE; else if (attr == &dev_attr_priv_sess_recovery_tmo.attr) return S_IRUGO | S_IWUSR; else if (attr == &dev_attr_priv_sess_state.attr) return S_IRUGO; else if (attr == &dev_attr_priv_sess_target_state.attr) return S_IRUGO; else if (attr == &dev_attr_priv_sess_creator.attr) return S_IRUGO; else if (attr == &dev_attr_priv_sess_target_id.attr) return S_IRUGO; else { WARN_ONCE(1, "Invalid session attr"); return 0; } return t->attr_is_visible(ISCSI_PARAM, param); } static struct attribute_group iscsi_session_group = { .attrs = iscsi_session_attrs, .is_visible = iscsi_session_attr_is_visible, }; /* * iSCSI host attrs */ #define iscsi_host_attr_show(param) \ static ssize_t \ show_host_param_##param(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct Scsi_Host *shost = transport_class_to_shost(dev); \ struct iscsi_internal *priv = to_iscsi_internal(shost->transportt); \ return priv->iscsi_transport->get_host_param(shost, param, buf); \ } #define iscsi_host_attr(field, param) \ iscsi_host_attr_show(param) \ static ISCSI_CLASS_ATTR(host, field, S_IRUGO, show_host_param_##param, \ NULL); iscsi_host_attr(netdev, ISCSI_HOST_PARAM_NETDEV_NAME); iscsi_host_attr(hwaddress, ISCSI_HOST_PARAM_HWADDRESS); iscsi_host_attr(ipaddress, ISCSI_HOST_PARAM_IPADDRESS); iscsi_host_attr(initiatorname, ISCSI_HOST_PARAM_INITIATOR_NAME); iscsi_host_attr(port_state, ISCSI_HOST_PARAM_PORT_STATE); iscsi_host_attr(port_speed, ISCSI_HOST_PARAM_PORT_SPEED); static struct attribute *iscsi_host_attrs[] = { &dev_attr_host_netdev.attr, &dev_attr_host_hwaddress.attr, &dev_attr_host_ipaddress.attr, &dev_attr_host_initiatorname.attr, &dev_attr_host_port_state.attr, &dev_attr_host_port_speed.attr, NULL, }; static umode_t iscsi_host_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *cdev = container_of(kobj, struct device, kobj); struct Scsi_Host *shost = transport_class_to_shost(cdev); struct iscsi_internal *priv = to_iscsi_internal(shost->transportt); int param; if (attr == &dev_attr_host_netdev.attr) param = ISCSI_HOST_PARAM_NETDEV_NAME; else if (attr == &dev_attr_host_hwaddress.attr) param = ISCSI_HOST_PARAM_HWADDRESS; else if (attr == &dev_attr_host_ipaddress.attr) param = ISCSI_HOST_PARAM_IPADDRESS; else if (attr == &dev_attr_host_initiatorname.attr) param = ISCSI_HOST_PARAM_INITIATOR_NAME; else if (attr == &dev_attr_host_port_state.attr) param = ISCSI_HOST_PARAM_PORT_STATE; else if (attr == &dev_attr_host_port_speed.attr) param = ISCSI_HOST_PARAM_PORT_SPEED; else { WARN_ONCE(1, "Invalid host attr"); return 0; } return priv->iscsi_transport->attr_is_visible(ISCSI_HOST_PARAM, param); } static struct attribute_group iscsi_host_group = { .attrs = iscsi_host_attrs, .is_visible = iscsi_host_attr_is_visible, }; /* convert iscsi_port_speed values to ascii string name */ static const struct { enum iscsi_port_speed value; char *name; } iscsi_port_speed_names[] = { {ISCSI_PORT_SPEED_UNKNOWN, "Unknown" }, {ISCSI_PORT_SPEED_10MBPS, "10 Mbps" }, {ISCSI_PORT_SPEED_100MBPS, "100 Mbps" }, {ISCSI_PORT_SPEED_1GBPS, "1 Gbps" }, {ISCSI_PORT_SPEED_10GBPS, "10 Gbps" }, {ISCSI_PORT_SPEED_25GBPS, "25 Gbps" }, {ISCSI_PORT_SPEED_40GBPS, "40 Gbps" }, }; char *iscsi_get_port_speed_name(struct Scsi_Host *shost) { int i; char *speed = "Unknown!"; struct iscsi_cls_host *ihost = shost->shost_data; uint32_t port_speed = ihost->port_speed; for (i = 0; i < ARRAY_SIZE(iscsi_port_speed_names); i++) { if (iscsi_port_speed_names[i].value & port_speed) { speed = iscsi_port_speed_names[i].name; break; } } return speed; } EXPORT_SYMBOL_GPL(iscsi_get_port_speed_name); /* convert iscsi_port_state values to ascii string name */ static const struct { enum iscsi_port_state value; char *name; } iscsi_port_state_names[] = { {ISCSI_PORT_STATE_DOWN, "LINK DOWN" }, {ISCSI_PORT_STATE_UP, "LINK UP" }, }; char *iscsi_get_port_state_name(struct Scsi_Host *shost) { int i; char *state = "Unknown!"; struct iscsi_cls_host *ihost = shost->shost_data; uint32_t port_state = ihost->port_state; for (i = 0; i < ARRAY_SIZE(iscsi_port_state_names); i++) { if (iscsi_port_state_names[i].value & port_state) { state = iscsi_port_state_names[i].name; break; } } return state; } EXPORT_SYMBOL_GPL(iscsi_get_port_state_name); static int iscsi_session_match(struct attribute_container *cont, struct device *dev) { struct iscsi_cls_session *session; struct Scsi_Host *shost; struct iscsi_internal *priv; if (!iscsi_is_session_dev(dev)) return 0; session = iscsi_dev_to_session(dev); shost = iscsi_session_to_shost(session); if (!shost->transportt) return 0; priv = to_iscsi_internal(shost->transportt); if (priv->session_cont.ac.class != &iscsi_session_class.class) return 0; return &priv->session_cont.ac == cont; } static int iscsi_conn_match(struct attribute_container *cont, struct device *dev) { struct iscsi_cls_session *session; struct iscsi_cls_conn *conn; struct Scsi_Host *shost; struct iscsi_internal *priv; if (!iscsi_is_conn_dev(dev)) return 0; conn = iscsi_dev_to_conn(dev); session = iscsi_dev_to_session(conn->dev.parent); shost = iscsi_session_to_shost(session); if (!shost->transportt) return 0; priv = to_iscsi_internal(shost->transportt); if (priv->conn_cont.ac.class != &iscsi_connection_class.class) return 0; return &priv->conn_cont.ac == cont; } static int iscsi_host_match(struct attribute_container *cont, struct device *dev) { struct Scsi_Host *shost; struct iscsi_internal *priv; if (!scsi_is_host_device(dev)) return 0; shost = dev_to_shost(dev); if (!shost->transportt || shost->transportt->host_attrs.ac.class != &iscsi_host_class.class) return 0; priv = to_iscsi_internal(shost->transportt); return &priv->t.host_attrs.ac == cont; } struct scsi_transport_template * iscsi_register_transport(struct iscsi_transport *tt) { struct iscsi_internal *priv; unsigned long flags; int err; BUG_ON(!tt); WARN_ON(tt->ep_disconnect && !tt->unbind_conn); priv = iscsi_if_transport_lookup(tt); if (priv) return NULL; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return NULL; INIT_LIST_HEAD(&priv->list); priv->iscsi_transport = tt; priv->t.user_scan = iscsi_user_scan; priv->dev.class = &iscsi_transport_class; dev_set_name(&priv->dev, "%s", tt->name); err = device_register(&priv->dev); if (err) goto put_dev; err = sysfs_create_group(&priv->dev.kobj, &iscsi_transport_group); if (err) goto unregister_dev; /* host parameters */ priv->t.host_attrs.ac.class = &iscsi_host_class.class; priv->t.host_attrs.ac.match = iscsi_host_match; priv->t.host_attrs.ac.grp = &iscsi_host_group; priv->t.host_size = sizeof(struct iscsi_cls_host); transport_container_register(&priv->t.host_attrs); /* connection parameters */ priv->conn_cont.ac.class = &iscsi_connection_class.class; priv->conn_cont.ac.match = iscsi_conn_match; priv->conn_cont.ac.grp = &iscsi_conn_group; transport_container_register(&priv->conn_cont); /* session parameters */ priv->session_cont.ac.class = &iscsi_session_class.class; priv->session_cont.ac.match = iscsi_session_match; priv->session_cont.ac.grp = &iscsi_session_group; transport_container_register(&priv->session_cont); spin_lock_irqsave(&iscsi_transport_lock, flags); list_add(&priv->list, &iscsi_transports); spin_unlock_irqrestore(&iscsi_transport_lock, flags); printk(KERN_NOTICE "iscsi: registered transport (%s)\n", tt->name); return &priv->t; unregister_dev: device_unregister(&priv->dev); return NULL; put_dev: put_device(&priv->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_register_transport); void iscsi_unregister_transport(struct iscsi_transport *tt) { struct iscsi_internal *priv; unsigned long flags; BUG_ON(!tt); mutex_lock(&rx_queue_mutex); priv = iscsi_if_transport_lookup(tt); BUG_ON (!priv); spin_lock_irqsave(&iscsi_transport_lock, flags); list_del(&priv->list); spin_unlock_irqrestore(&iscsi_transport_lock, flags); transport_container_unregister(&priv->conn_cont); transport_container_unregister(&priv->session_cont); transport_container_unregister(&priv->t.host_attrs); sysfs_remove_group(&priv->dev.kobj, &iscsi_transport_group); device_unregister(&priv->dev); mutex_unlock(&rx_queue_mutex); } EXPORT_SYMBOL_GPL(iscsi_unregister_transport); void iscsi_dbg_trace(void (*trace)(struct device *dev, struct va_format *), struct device *dev, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; trace(dev, &vaf); va_end(args); } EXPORT_SYMBOL_GPL(iscsi_dbg_trace); static __init int iscsi_transport_init(void) { int err; struct netlink_kernel_cfg cfg = { .groups = 1, .input = iscsi_if_rx, }; printk(KERN_INFO "Loading iSCSI transport class v%s.\n", ISCSI_TRANSPORT_VERSION); atomic_set(&iscsi_session_nr, 0); err = class_register(&iscsi_transport_class); if (err) return err; err = class_register(&iscsi_endpoint_class); if (err) goto unregister_transport_class; err = class_register(&iscsi_iface_class); if (err) goto unregister_endpoint_class; err = transport_class_register(&iscsi_host_class); if (err) goto unregister_iface_class; err = transport_class_register(&iscsi_connection_class); if (err) goto unregister_host_class; err = transport_class_register(&iscsi_session_class); if (err) goto unregister_conn_class; err = bus_register(&iscsi_flashnode_bus); if (err) goto unregister_session_class; nls = netlink_kernel_create(&init_net, NETLINK_ISCSI, &cfg); if (!nls) { err = -ENOBUFS; goto unregister_flashnode_bus; } iscsi_conn_cleanup_workq = alloc_workqueue("%s", WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND, 0, "iscsi_conn_cleanup"); if (!iscsi_conn_cleanup_workq) { err = -ENOMEM; goto release_nls; } return 0; release_nls: netlink_kernel_release(nls); unregister_flashnode_bus: bus_unregister(&iscsi_flashnode_bus); unregister_session_class: transport_class_unregister(&iscsi_session_class); unregister_conn_class: transport_class_unregister(&iscsi_connection_class); unregister_host_class: transport_class_unregister(&iscsi_host_class); unregister_iface_class: class_unregister(&iscsi_iface_class); unregister_endpoint_class: class_unregister(&iscsi_endpoint_class); unregister_transport_class: class_unregister(&iscsi_transport_class); return err; } static void __exit iscsi_transport_exit(void) { destroy_workqueue(iscsi_conn_cleanup_workq); netlink_kernel_release(nls); bus_unregister(&iscsi_flashnode_bus); transport_class_unregister(&iscsi_connection_class); transport_class_unregister(&iscsi_session_class); transport_class_unregister(&iscsi_host_class); class_unregister(&iscsi_endpoint_class); class_unregister(&iscsi_iface_class); class_unregister(&iscsi_transport_class); } module_init(iscsi_transport_init); module_exit(iscsi_transport_exit); MODULE_AUTHOR("Mike Christie <michaelc@cs.wisc.edu>, " "Dmitry Yusupov <dmitry_yus@yahoo.com>, " "Alex Aizman <itn780@yahoo.com>"); MODULE_DESCRIPTION("iSCSI Transport Interface"); MODULE_LICENSE("GPL"); MODULE_VERSION(ISCSI_TRANSPORT_VERSION); MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_ISCSI); |
1 1 1 6 6 2 8 8 13 1 14 13 3 15 8 15 11 4 4 1 13 13 2 13 10 8 7 7 7 7 7 4 4 4 4 4 4 3 4 1 9 5 2 2 4 1 1 1 1 17 13 13 12 1 12 1 2 2 5 2 2 45 46 45 45 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 | /* * Copyright © 2008 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Authors: * Eric Anholt <eric@anholt.net> * */ #include <linux/dma-buf.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/iosys-map.h> #include <linux/mem_encrypt.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/module.h> #include <linux/pagemap.h> #include <linux/pagevec.h> #include <linux/shmem_fs.h> #include <linux/slab.h> #include <linux/string_helpers.h> #include <linux/types.h> #include <linux/uaccess.h> #include <drm/drm.h> #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_gem.h> #include <drm/drm_managed.h> #include <drm/drm_print.h> #include <drm/drm_vma_manager.h> #include "drm_internal.h" /** @file drm_gem.c * * This file provides some of the base ioctls and library routines for * the graphics memory manager implemented by each device driver. * * Because various devices have different requirements in terms of * synchronization and migration strategies, implementing that is left up to * the driver, and all that the general API provides should be generic -- * allocating objects, reading/writing data with the cpu, freeing objects. * Even there, platform-dependent optimizations for reading/writing data with * the CPU mean we'll likely hook those out to driver-specific calls. However, * the DRI2 implementation wants to have at least allocate/mmap be generic. * * The goal was to have swap-backed object allocation managed through * struct file. However, file descriptors as handles to a struct file have * two major failings: * - Process limits prevent more than 1024 or so being used at a time by * default. * - Inability to allocate high fds will aggravate the X Server's select() * handling, and likely that of many GL client applications as well. * * This led to a plan of using our own integer IDs (called handles, following * DRM terminology) to mimic fds, and implement the fd syscalls we need as * ioctls. The objects themselves will still include the struct file so * that we can transition to fds if the required kernel infrastructure shows * up at a later date, and as our interface with shmfs for memory allocation. */ static void drm_gem_init_release(struct drm_device *dev, void *ptr) { drm_vma_offset_manager_destroy(dev->vma_offset_manager); } /** * drm_gem_init - Initialize the GEM device fields * @dev: drm_devic structure to initialize */ int drm_gem_init(struct drm_device *dev) { struct drm_vma_offset_manager *vma_offset_manager; mutex_init(&dev->object_name_lock); idr_init_base(&dev->object_name_idr, 1); vma_offset_manager = drmm_kzalloc(dev, sizeof(*vma_offset_manager), GFP_KERNEL); if (!vma_offset_manager) { DRM_ERROR("out of memory\n"); return -ENOMEM; } dev->vma_offset_manager = vma_offset_manager; drm_vma_offset_manager_init(vma_offset_manager, DRM_FILE_PAGE_OFFSET_START, DRM_FILE_PAGE_OFFSET_SIZE); return drmm_add_action(dev, drm_gem_init_release, NULL); } /** * drm_gem_object_init - initialize an allocated shmem-backed GEM object * @dev: drm_device the object should be initialized for * @obj: drm_gem_object to initialize * @size: object size * * Initialize an already allocated GEM object of the specified size with * shmfs backing store. */ int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size) { struct file *filp; drm_gem_private_object_init(dev, obj, size); filp = shmem_file_setup("drm mm object", size, VM_NORESERVE); if (IS_ERR(filp)) return PTR_ERR(filp); obj->filp = filp; return 0; } EXPORT_SYMBOL(drm_gem_object_init); /** * drm_gem_private_object_init - initialize an allocated private GEM object * @dev: drm_device the object should be initialized for * @obj: drm_gem_object to initialize * @size: object size * * Initialize an already allocated GEM object of the specified size with * no GEM provided backing store. Instead the caller is responsible for * backing the object and handling it. */ void drm_gem_private_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size) { BUG_ON((size & (PAGE_SIZE - 1)) != 0); obj->dev = dev; obj->filp = NULL; kref_init(&obj->refcount); obj->handle_count = 0; obj->size = size; dma_resv_init(&obj->_resv); if (!obj->resv) obj->resv = &obj->_resv; if (drm_core_check_feature(dev, DRIVER_GEM_GPUVA)) drm_gem_gpuva_init(obj); drm_vma_node_reset(&obj->vma_node); INIT_LIST_HEAD(&obj->lru_node); } EXPORT_SYMBOL(drm_gem_private_object_init); /** * drm_gem_private_object_fini - Finalize a failed drm_gem_object * @obj: drm_gem_object * * Uninitialize an already allocated GEM object when it initialized failed */ void drm_gem_private_object_fini(struct drm_gem_object *obj) { WARN_ON(obj->dma_buf); dma_resv_fini(&obj->_resv); } EXPORT_SYMBOL(drm_gem_private_object_fini); /** * drm_gem_object_handle_free - release resources bound to userspace handles * @obj: GEM object to clean up. * * Called after the last handle to the object has been closed * * Removes any name for the object. Note that this must be * called before drm_gem_object_free or we'll be touching * freed memory */ static void drm_gem_object_handle_free(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; /* Remove any name for this object */ if (obj->name) { idr_remove(&dev->object_name_idr, obj->name); obj->name = 0; } } static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj) { /* Unbreak the reference cycle if we have an exported dma_buf. */ if (obj->dma_buf) { dma_buf_put(obj->dma_buf); obj->dma_buf = NULL; } } static void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; bool final = false; if (WARN_ON(READ_ONCE(obj->handle_count) == 0)) return; /* * Must bump handle count first as this may be the last * ref, in which case the object would disappear before we * checked for a name */ mutex_lock(&dev->object_name_lock); if (--obj->handle_count == 0) { drm_gem_object_handle_free(obj); drm_gem_object_exported_dma_buf_free(obj); final = true; } mutex_unlock(&dev->object_name_lock); if (final) drm_gem_object_put(obj); } /* * Called at device or object close to release the file's * handle references on objects. */ static int drm_gem_object_release_handle(int id, void *ptr, void *data) { struct drm_file *file_priv = data; struct drm_gem_object *obj = ptr; if (obj->funcs->close) obj->funcs->close(obj, file_priv); drm_prime_remove_buf_handle(&file_priv->prime, id); drm_vma_node_revoke(&obj->vma_node, file_priv); drm_gem_object_handle_put_unlocked(obj); return 0; } /** * drm_gem_handle_delete - deletes the given file-private handle * @filp: drm file-private structure to use for the handle look up * @handle: userspace handle to delete * * Removes the GEM handle from the @filp lookup table which has been added with * drm_gem_handle_create(). If this is the last handle also cleans up linked * resources like GEM names. */ int drm_gem_handle_delete(struct drm_file *filp, u32 handle) { struct drm_gem_object *obj; spin_lock(&filp->table_lock); /* Check if we currently have a reference on the object */ obj = idr_replace(&filp->object_idr, NULL, handle); spin_unlock(&filp->table_lock); if (IS_ERR_OR_NULL(obj)) return -EINVAL; /* Release driver's reference and decrement refcount. */ drm_gem_object_release_handle(handle, obj, filp); /* And finally make the handle available for future allocations. */ spin_lock(&filp->table_lock); idr_remove(&filp->object_idr, handle); spin_unlock(&filp->table_lock); return 0; } EXPORT_SYMBOL(drm_gem_handle_delete); /** * drm_gem_dumb_map_offset - return the fake mmap offset for a gem object * @file: drm file-private structure containing the gem object * @dev: corresponding drm_device * @handle: gem object handle * @offset: return location for the fake mmap offset * * This implements the &drm_driver.dumb_map_offset kms driver callback for * drivers which use gem to manage their backing storage. * * Returns: * 0 on success or a negative error code on failure. */ int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev, u32 handle, u64 *offset) { struct drm_gem_object *obj; int ret; obj = drm_gem_object_lookup(file, handle); if (!obj) return -ENOENT; /* Don't allow imported objects to be mapped */ if (obj->import_attach) { ret = -EINVAL; goto out; } ret = drm_gem_create_mmap_offset(obj); if (ret) goto out; *offset = drm_vma_node_offset_addr(&obj->vma_node); out: drm_gem_object_put(obj); return ret; } EXPORT_SYMBOL_GPL(drm_gem_dumb_map_offset); /** * drm_gem_handle_create_tail - internal functions to create a handle * @file_priv: drm file-private structure to register the handle for * @obj: object to register * @handlep: pointer to return the created handle to the caller * * This expects the &drm_device.object_name_lock to be held already and will * drop it before returning. Used to avoid races in establishing new handles * when importing an object from either an flink name or a dma-buf. * * Handles must be release again through drm_gem_handle_delete(). This is done * when userspace closes @file_priv for all attached handles, or through the * GEM_CLOSE ioctl for individual handles. */ int drm_gem_handle_create_tail(struct drm_file *file_priv, struct drm_gem_object *obj, u32 *handlep) { struct drm_device *dev = obj->dev; u32 handle; int ret; WARN_ON(!mutex_is_locked(&dev->object_name_lock)); if (obj->handle_count++ == 0) drm_gem_object_get(obj); /* * Get the user-visible handle using idr. Preload and perform * allocation under our spinlock. */ idr_preload(GFP_KERNEL); spin_lock(&file_priv->table_lock); ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT); spin_unlock(&file_priv->table_lock); idr_preload_end(); mutex_unlock(&dev->object_name_lock); if (ret < 0) goto err_unref; handle = ret; ret = drm_vma_node_allow(&obj->vma_node, file_priv); if (ret) goto err_remove; if (obj->funcs->open) { ret = obj->funcs->open(obj, file_priv); if (ret) goto err_revoke; } *handlep = handle; return 0; err_revoke: drm_vma_node_revoke(&obj->vma_node, file_priv); err_remove: spin_lock(&file_priv->table_lock); idr_remove(&file_priv->object_idr, handle); spin_unlock(&file_priv->table_lock); err_unref: drm_gem_object_handle_put_unlocked(obj); return ret; } /** * drm_gem_handle_create - create a gem handle for an object * @file_priv: drm file-private structure to register the handle for * @obj: object to register * @handlep: pointer to return the created handle to the caller * * Create a handle for this object. This adds a handle reference to the object, * which includes a regular reference count. Callers will likely want to * dereference the object afterwards. * * Since this publishes @obj to userspace it must be fully set up by this point, * drivers must call this last in their buffer object creation callbacks. */ int drm_gem_handle_create(struct drm_file *file_priv, struct drm_gem_object *obj, u32 *handlep) { mutex_lock(&obj->dev->object_name_lock); return drm_gem_handle_create_tail(file_priv, obj, handlep); } EXPORT_SYMBOL(drm_gem_handle_create); /** * drm_gem_free_mmap_offset - release a fake mmap offset for an object * @obj: obj in question * * This routine frees fake offsets allocated by drm_gem_create_mmap_offset(). * * Note that drm_gem_object_release() already calls this function, so drivers * don't have to take care of releasing the mmap offset themselves when freeing * the GEM object. */ void drm_gem_free_mmap_offset(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; drm_vma_offset_remove(dev->vma_offset_manager, &obj->vma_node); } EXPORT_SYMBOL(drm_gem_free_mmap_offset); /** * drm_gem_create_mmap_offset_size - create a fake mmap offset for an object * @obj: obj in question * @size: the virtual size * * GEM memory mapping works by handing back to userspace a fake mmap offset * it can use in a subsequent mmap(2) call. The DRM core code then looks * up the object based on the offset and sets up the various memory mapping * structures. * * This routine allocates and attaches a fake offset for @obj, in cases where * the virtual size differs from the physical size (ie. &drm_gem_object.size). * Otherwise just use drm_gem_create_mmap_offset(). * * This function is idempotent and handles an already allocated mmap offset * transparently. Drivers do not need to check for this case. */ int drm_gem_create_mmap_offset_size(struct drm_gem_object *obj, size_t size) { struct drm_device *dev = obj->dev; return drm_vma_offset_add(dev->vma_offset_manager, &obj->vma_node, size / PAGE_SIZE); } EXPORT_SYMBOL(drm_gem_create_mmap_offset_size); /** * drm_gem_create_mmap_offset - create a fake mmap offset for an object * @obj: obj in question * * GEM memory mapping works by handing back to userspace a fake mmap offset * it can use in a subsequent mmap(2) call. The DRM core code then looks * up the object based on the offset and sets up the various memory mapping * structures. * * This routine allocates and attaches a fake offset for @obj. * * Drivers can call drm_gem_free_mmap_offset() before freeing @obj to release * the fake offset again. */ int drm_gem_create_mmap_offset(struct drm_gem_object *obj) { return drm_gem_create_mmap_offset_size(obj, obj->size); } EXPORT_SYMBOL(drm_gem_create_mmap_offset); /* * Move folios to appropriate lru and release the folios, decrementing the * ref count of those folios. */ static void drm_gem_check_release_batch(struct folio_batch *fbatch) { check_move_unevictable_folios(fbatch); __folio_batch_release(fbatch); cond_resched(); } /** * drm_gem_get_pages - helper to allocate backing pages for a GEM object * from shmem * @obj: obj in question * * This reads the page-array of the shmem-backing storage of the given gem * object. An array of pages is returned. If a page is not allocated or * swapped-out, this will allocate/swap-in the required pages. Note that the * whole object is covered by the page-array and pinned in memory. * * Use drm_gem_put_pages() to release the array and unpin all pages. * * This uses the GFP-mask set on the shmem-mapping (see mapping_set_gfp_mask()). * If you require other GFP-masks, you have to do those allocations yourself. * * Note that you are not allowed to change gfp-zones during runtime. That is, * shmem_read_mapping_page_gfp() must be called with the same gfp_zone(gfp) as * set during initialization. If you have special zone constraints, set them * after drm_gem_object_init() via mapping_set_gfp_mask(). shmem-core takes care * to keep pages in the required zone during swap-in. * * This function is only valid on objects initialized with * drm_gem_object_init(), but not for those initialized with * drm_gem_private_object_init() only. */ struct page **drm_gem_get_pages(struct drm_gem_object *obj) { struct address_space *mapping; struct page **pages; struct folio *folio; struct folio_batch fbatch; long i, j, npages; if (WARN_ON(!obj->filp)) return ERR_PTR(-EINVAL); /* This is the shared memory object that backs the GEM resource */ mapping = obj->filp->f_mapping; /* We already BUG_ON() for non-page-aligned sizes in * drm_gem_object_init(), so we should never hit this unless * driver author is doing something really wrong: */ WARN_ON((obj->size & (PAGE_SIZE - 1)) != 0); npages = obj->size >> PAGE_SHIFT; pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); if (pages == NULL) return ERR_PTR(-ENOMEM); mapping_set_unevictable(mapping); i = 0; while (i < npages) { long nr; folio = shmem_read_folio_gfp(mapping, i, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) goto fail; nr = min(npages - i, folio_nr_pages(folio)); for (j = 0; j < nr; j++, i++) pages[i] = folio_file_page(folio, i); /* Make sure shmem keeps __GFP_DMA32 allocated pages in the * correct region during swapin. Note that this requires * __GFP_DMA32 to be set in mapping_gfp_mask(inode->i_mapping) * so shmem can relocate pages during swapin if required. */ BUG_ON(mapping_gfp_constraint(mapping, __GFP_DMA32) && (folio_pfn(folio) >= 0x00100000UL)); } return pages; fail: mapping_clear_unevictable(mapping); folio_batch_init(&fbatch); j = 0; while (j < i) { struct folio *f = page_folio(pages[j]); if (!folio_batch_add(&fbatch, f)) drm_gem_check_release_batch(&fbatch); j += folio_nr_pages(f); } if (fbatch.nr) drm_gem_check_release_batch(&fbatch); kvfree(pages); return ERR_CAST(folio); } EXPORT_SYMBOL(drm_gem_get_pages); /** * drm_gem_put_pages - helper to free backing pages for a GEM object * @obj: obj in question * @pages: pages to free * @dirty: if true, pages will be marked as dirty * @accessed: if true, the pages will be marked as accessed */ void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages, bool dirty, bool accessed) { int i, npages; struct address_space *mapping; struct folio_batch fbatch; mapping = file_inode(obj->filp)->i_mapping; mapping_clear_unevictable(mapping); /* We already BUG_ON() for non-page-aligned sizes in * drm_gem_object_init(), so we should never hit this unless * driver author is doing something really wrong: */ WARN_ON((obj->size & (PAGE_SIZE - 1)) != 0); npages = obj->size >> PAGE_SHIFT; folio_batch_init(&fbatch); for (i = 0; i < npages; i++) { struct folio *folio; if (!pages[i]) continue; folio = page_folio(pages[i]); if (dirty) folio_mark_dirty(folio); if (accessed) folio_mark_accessed(folio); /* Undo the reference we took when populating the table */ if (!folio_batch_add(&fbatch, folio)) drm_gem_check_release_batch(&fbatch); i += folio_nr_pages(folio) - 1; } if (folio_batch_count(&fbatch)) drm_gem_check_release_batch(&fbatch); kvfree(pages); } EXPORT_SYMBOL(drm_gem_put_pages); static int objects_lookup(struct drm_file *filp, u32 *handle, int count, struct drm_gem_object **objs) { int i, ret = 0; struct drm_gem_object *obj; spin_lock(&filp->table_lock); for (i = 0; i < count; i++) { /* Check if we currently have a reference on the object */ obj = idr_find(&filp->object_idr, handle[i]); if (!obj) { ret = -ENOENT; break; } drm_gem_object_get(obj); objs[i] = obj; } spin_unlock(&filp->table_lock); return ret; } /** * drm_gem_objects_lookup - look up GEM objects from an array of handles * @filp: DRM file private date * @bo_handles: user pointer to array of userspace handle * @count: size of handle array * @objs_out: returned pointer to array of drm_gem_object pointers * * Takes an array of userspace handles and returns a newly allocated array of * GEM objects. * * For a single handle lookup, use drm_gem_object_lookup(). * * Returns: * * @objs filled in with GEM object pointers. Returned GEM objects need to be * released with drm_gem_object_put(). -ENOENT is returned on a lookup * failure. 0 is returned on success. * */ int drm_gem_objects_lookup(struct drm_file *filp, void __user *bo_handles, int count, struct drm_gem_object ***objs_out) { int ret; u32 *handles; struct drm_gem_object **objs; if (!count) return 0; objs = kvmalloc_array(count, sizeof(struct drm_gem_object *), GFP_KERNEL | __GFP_ZERO); if (!objs) return -ENOMEM; *objs_out = objs; handles = kvmalloc_array(count, sizeof(u32), GFP_KERNEL); if (!handles) { ret = -ENOMEM; goto out; } if (copy_from_user(handles, bo_handles, count * sizeof(u32))) { ret = -EFAULT; DRM_DEBUG("Failed to copy in GEM handles\n"); goto out; } ret = objects_lookup(filp, handles, count, objs); out: kvfree(handles); return ret; } EXPORT_SYMBOL(drm_gem_objects_lookup); /** * drm_gem_object_lookup - look up a GEM object from its handle * @filp: DRM file private date * @handle: userspace handle * * Returns: * * A reference to the object named by the handle if such exists on @filp, NULL * otherwise. * * If looking up an array of handles, use drm_gem_objects_lookup(). */ struct drm_gem_object * drm_gem_object_lookup(struct drm_file *filp, u32 handle) { struct drm_gem_object *obj = NULL; objects_lookup(filp, &handle, 1, &obj); return obj; } EXPORT_SYMBOL(drm_gem_object_lookup); /** * drm_gem_dma_resv_wait - Wait on GEM object's reservation's objects * shared and/or exclusive fences. * @filep: DRM file private date * @handle: userspace handle * @wait_all: if true, wait on all fences, else wait on just exclusive fence * @timeout: timeout value in jiffies or zero to return immediately * * Returns: * * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or * greater than 0 on success. */ long drm_gem_dma_resv_wait(struct drm_file *filep, u32 handle, bool wait_all, unsigned long timeout) { long ret; struct drm_gem_object *obj; obj = drm_gem_object_lookup(filep, handle); if (!obj) { DRM_DEBUG("Failed to look up GEM BO %d\n", handle); return -EINVAL; } ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(wait_all), true, timeout); if (ret == 0) ret = -ETIME; else if (ret > 0) ret = 0; drm_gem_object_put(obj); return ret; } EXPORT_SYMBOL(drm_gem_dma_resv_wait); /** * drm_gem_close_ioctl - implementation of the GEM_CLOSE ioctl * @dev: drm_device * @data: ioctl data * @file_priv: drm file-private structure * * Releases the handle to an mm object. */ int drm_gem_close_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_gem_close *args = data; int ret; if (!drm_core_check_feature(dev, DRIVER_GEM)) return -EOPNOTSUPP; ret = drm_gem_handle_delete(file_priv, args->handle); return ret; } /** * drm_gem_flink_ioctl - implementation of the GEM_FLINK ioctl * @dev: drm_device * @data: ioctl data * @file_priv: drm file-private structure * * Create a global name for an object, returning the name. * * Note that the name does not hold a reference; when the object * is freed, the name goes away. */ int drm_gem_flink_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_gem_flink *args = data; struct drm_gem_object *obj; int ret; if (!drm_core_check_feature(dev, DRIVER_GEM)) return -EOPNOTSUPP; obj = drm_gem_object_lookup(file_priv, args->handle); if (obj == NULL) return -ENOENT; mutex_lock(&dev->object_name_lock); /* prevent races with concurrent gem_close. */ if (obj->handle_count == 0) { ret = -ENOENT; goto err; } if (!obj->name) { ret = idr_alloc(&dev->object_name_idr, obj, 1, 0, GFP_KERNEL); if (ret < 0) goto err; obj->name = ret; } args->name = (uint64_t) obj->name; ret = 0; err: mutex_unlock(&dev->object_name_lock); drm_gem_object_put(obj); return ret; } /** * drm_gem_open_ioctl - implementation of the GEM_OPEN ioctl * @dev: drm_device * @data: ioctl data * @file_priv: drm file-private structure * * Open an object using the global name, returning a handle and the size. * * This handle (of course) holds a reference to the object, so the object * will not go away until the handle is deleted. */ int drm_gem_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_gem_open *args = data; struct drm_gem_object *obj; int ret; u32 handle; if (!drm_core_check_feature(dev, DRIVER_GEM)) return -EOPNOTSUPP; mutex_lock(&dev->object_name_lock); obj = idr_find(&dev->object_name_idr, (int) args->name); if (obj) { drm_gem_object_get(obj); } else { mutex_unlock(&dev->object_name_lock); return -ENOENT; } /* drm_gem_handle_create_tail unlocks dev->object_name_lock. */ ret = drm_gem_handle_create_tail(file_priv, obj, &handle); if (ret) goto err; args->handle = handle; args->size = obj->size; err: drm_gem_object_put(obj); return ret; } /** * drm_gem_open - initializes GEM file-private structures at devnode open time * @dev: drm_device which is being opened by userspace * @file_private: drm file-private structure to set up * * Called at device open time, sets up the structure for handling refcounting * of mm objects. */ void drm_gem_open(struct drm_device *dev, struct drm_file *file_private) { idr_init_base(&file_private->object_idr, 1); spin_lock_init(&file_private->table_lock); } /** * drm_gem_release - release file-private GEM resources * @dev: drm_device which is being closed by userspace * @file_private: drm file-private structure to clean up * * Called at close time when the filp is going away. * * Releases any remaining references on objects by this filp. */ void drm_gem_release(struct drm_device *dev, struct drm_file *file_private) { idr_for_each(&file_private->object_idr, &drm_gem_object_release_handle, file_private); idr_destroy(&file_private->object_idr); } /** * drm_gem_object_release - release GEM buffer object resources * @obj: GEM buffer object * * This releases any structures and resources used by @obj and is the inverse of * drm_gem_object_init(). */ void drm_gem_object_release(struct drm_gem_object *obj) { if (obj->filp) fput(obj->filp); drm_gem_private_object_fini(obj); drm_gem_free_mmap_offset(obj); drm_gem_lru_remove(obj); } EXPORT_SYMBOL(drm_gem_object_release); /** * drm_gem_object_free - free a GEM object * @kref: kref of the object to free * * Called after the last reference to the object has been lost. * * Frees the object */ void drm_gem_object_free(struct kref *kref) { struct drm_gem_object *obj = container_of(kref, struct drm_gem_object, refcount); if (WARN_ON(!obj->funcs->free)) return; obj->funcs->free(obj); } EXPORT_SYMBOL(drm_gem_object_free); /** * drm_gem_vm_open - vma->ops->open implementation for GEM * @vma: VM area structure * * This function implements the #vm_operations_struct open() callback for GEM * drivers. This must be used together with drm_gem_vm_close(). */ void drm_gem_vm_open(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; drm_gem_object_get(obj); } EXPORT_SYMBOL(drm_gem_vm_open); /** * drm_gem_vm_close - vma->ops->close implementation for GEM * @vma: VM area structure * * This function implements the #vm_operations_struct close() callback for GEM * drivers. This must be used together with drm_gem_vm_open(). */ void drm_gem_vm_close(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; drm_gem_object_put(obj); } EXPORT_SYMBOL(drm_gem_vm_close); /** * drm_gem_mmap_obj - memory map a GEM object * @obj: the GEM object to map * @obj_size: the object size to be mapped, in bytes * @vma: VMA for the area to be mapped * * Set up the VMA to prepare mapping of the GEM object using the GEM object's * vm_ops. Depending on their requirements, GEM objects can either * provide a fault handler in their vm_ops (in which case any accesses to * the object will be trapped, to perform migration, GTT binding, surface * register allocation, or performance monitoring), or mmap the buffer memory * synchronously after calling drm_gem_mmap_obj. * * This function is mainly intended to implement the DMABUF mmap operation, when * the GEM object is not looked up based on its fake offset. To implement the * DRM mmap operation, drivers should use the drm_gem_mmap() function. * * drm_gem_mmap_obj() assumes the user is granted access to the buffer while * drm_gem_mmap() prevents unprivileged users from mapping random objects. So * callers must verify access restrictions before calling this helper. * * Return 0 or success or -EINVAL if the object size is smaller than the VMA * size, or if no vm_ops are provided. */ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, struct vm_area_struct *vma) { int ret; /* Check for valid size. */ if (obj_size < vma->vm_end - vma->vm_start) return -EINVAL; /* Take a ref for this mapping of the object, so that the fault * handler can dereference the mmap offset's pointer to the object. * This reference is cleaned up by the corresponding vm_close * (which should happen whether the vma was created by this call, or * by a vm_open due to mremap or partial unmap or whatever). */ drm_gem_object_get(obj); vma->vm_private_data = obj; vma->vm_ops = obj->funcs->vm_ops; if (obj->funcs->mmap) { ret = obj->funcs->mmap(obj, vma); if (ret) goto err_drm_gem_object_put; WARN_ON(!(vma->vm_flags & VM_DONTEXPAND)); } else { if (!vma->vm_ops) { ret = -EINVAL; goto err_drm_gem_object_put; } vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); } return 0; err_drm_gem_object_put: drm_gem_object_put(obj); return ret; } EXPORT_SYMBOL(drm_gem_mmap_obj); /** * drm_gem_mmap - memory map routine for GEM objects * @filp: DRM file pointer * @vma: VMA for the area to be mapped * * If a driver supports GEM object mapping, mmap calls on the DRM file * descriptor will end up here. * * Look up the GEM object based on the offset passed in (vma->vm_pgoff will * contain the fake offset we created when the GTT map ioctl was called on * the object) and map it with a call to drm_gem_mmap_obj(). * * If the caller is not granted access to the buffer object, the mmap will fail * with EACCES. Please see the vma manager for more information. */ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) { struct drm_file *priv = filp->private_data; struct drm_device *dev = priv->minor->dev; struct drm_gem_object *obj = NULL; struct drm_vma_offset_node *node; int ret; if (drm_dev_is_unplugged(dev)) return -ENODEV; drm_vma_offset_lock_lookup(dev->vma_offset_manager); node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, vma->vm_pgoff, vma_pages(vma)); if (likely(node)) { obj = container_of(node, struct drm_gem_object, vma_node); /* * When the object is being freed, after it hits 0-refcnt it * proceeds to tear down the object. In the process it will * attempt to remove the VMA offset and so acquire this * mgr->vm_lock. Therefore if we find an object with a 0-refcnt * that matches our range, we know it is in the process of being * destroyed and will be freed as soon as we release the lock - * so we have to check for the 0-refcnted object and treat it as * invalid. */ if (!kref_get_unless_zero(&obj->refcount)) obj = NULL; } drm_vma_offset_unlock_lookup(dev->vma_offset_manager); if (!obj) return -EINVAL; if (!drm_vma_node_is_allowed(node, priv)) { drm_gem_object_put(obj); return -EACCES; } ret = drm_gem_mmap_obj(obj, drm_vma_node_size(node) << PAGE_SHIFT, vma); drm_gem_object_put(obj); return ret; } EXPORT_SYMBOL(drm_gem_mmap); void drm_gem_print_info(struct drm_printer *p, unsigned int indent, const struct drm_gem_object *obj) { drm_printf_indent(p, indent, "name=%d\n", obj->name); drm_printf_indent(p, indent, "refcount=%u\n", kref_read(&obj->refcount)); drm_printf_indent(p, indent, "start=%08lx\n", drm_vma_node_start(&obj->vma_node)); drm_printf_indent(p, indent, "size=%zu\n", obj->size); drm_printf_indent(p, indent, "imported=%s\n", str_yes_no(obj->import_attach)); if (obj->funcs->print_info) obj->funcs->print_info(p, indent, obj); } int drm_gem_pin(struct drm_gem_object *obj) { if (obj->funcs->pin) return obj->funcs->pin(obj); return 0; } void drm_gem_unpin(struct drm_gem_object *obj) { if (obj->funcs->unpin) obj->funcs->unpin(obj); } int drm_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map) { int ret; dma_resv_assert_held(obj->resv); if (!obj->funcs->vmap) return -EOPNOTSUPP; ret = obj->funcs->vmap(obj, map); if (ret) return ret; else if (iosys_map_is_null(map)) return -ENOMEM; return 0; } EXPORT_SYMBOL(drm_gem_vmap); void drm_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map) { dma_resv_assert_held(obj->resv); if (iosys_map_is_null(map)) return; if (obj->funcs->vunmap) obj->funcs->vunmap(obj, map); /* Always set the mapping to NULL. Callers may rely on this. */ iosys_map_clear(map); } EXPORT_SYMBOL(drm_gem_vunmap); int drm_gem_vmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map) { int ret; dma_resv_lock(obj->resv, NULL); ret = drm_gem_vmap(obj, map); dma_resv_unlock(obj->resv); return ret; } EXPORT_SYMBOL(drm_gem_vmap_unlocked); void drm_gem_vunmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map) { dma_resv_lock(obj->resv, NULL); drm_gem_vunmap(obj, map); dma_resv_unlock(obj->resv); } EXPORT_SYMBOL(drm_gem_vunmap_unlocked); /** * drm_gem_lock_reservations - Sets up the ww context and acquires * the lock on an array of GEM objects. * * Once you've locked your reservations, you'll want to set up space * for your shared fences (if applicable), submit your job, then * drm_gem_unlock_reservations(). * * @objs: drm_gem_objects to lock * @count: Number of objects in @objs * @acquire_ctx: struct ww_acquire_ctx that will be initialized as * part of tracking this set of locked reservations. */ int drm_gem_lock_reservations(struct drm_gem_object **objs, int count, struct ww_acquire_ctx *acquire_ctx) { int contended = -1; int i, ret; ww_acquire_init(acquire_ctx, &reservation_ww_class); retry: if (contended != -1) { struct drm_gem_object *obj = objs[contended]; ret = dma_resv_lock_slow_interruptible(obj->resv, acquire_ctx); if (ret) { ww_acquire_fini(acquire_ctx); return ret; } } for (i = 0; i < count; i++) { if (i == contended) continue; ret = dma_resv_lock_interruptible(objs[i]->resv, acquire_ctx); if (ret) { int j; for (j = 0; j < i; j++) dma_resv_unlock(objs[j]->resv); if (contended != -1 && contended >= i) dma_resv_unlock(objs[contended]->resv); if (ret == -EDEADLK) { contended = i; goto retry; } ww_acquire_fini(acquire_ctx); return ret; } } ww_acquire_done(acquire_ctx); return 0; } EXPORT_SYMBOL(drm_gem_lock_reservations); void drm_gem_unlock_reservations(struct drm_gem_object **objs, int count, struct ww_acquire_ctx *acquire_ctx) { int i; for (i = 0; i < count; i++) dma_resv_unlock(objs[i]->resv); ww_acquire_fini(acquire_ctx); } EXPORT_SYMBOL(drm_gem_unlock_reservations); /** * drm_gem_lru_init - initialize a LRU * * @lru: The LRU to initialize * @lock: The lock protecting the LRU */ void drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock) { lru->lock = lock; lru->count = 0; INIT_LIST_HEAD(&lru->list); } EXPORT_SYMBOL(drm_gem_lru_init); static void drm_gem_lru_remove_locked(struct drm_gem_object *obj) { obj->lru->count -= obj->size >> PAGE_SHIFT; WARN_ON(obj->lru->count < 0); list_del(&obj->lru_node); obj->lru = NULL; } /** * drm_gem_lru_remove - remove object from whatever LRU it is in * * If the object is currently in any LRU, remove it. * * @obj: The GEM object to remove from current LRU */ void drm_gem_lru_remove(struct drm_gem_object *obj) { struct drm_gem_lru *lru = obj->lru; if (!lru) return; mutex_lock(lru->lock); drm_gem_lru_remove_locked(obj); mutex_unlock(lru->lock); } EXPORT_SYMBOL(drm_gem_lru_remove); /** * drm_gem_lru_move_tail_locked - move the object to the tail of the LRU * * Like &drm_gem_lru_move_tail but lru lock must be held * * @lru: The LRU to move the object into. * @obj: The GEM object to move into this LRU */ void drm_gem_lru_move_tail_locked(struct drm_gem_lru *lru, struct drm_gem_object *obj) { lockdep_assert_held_once(lru->lock); if (obj->lru) drm_gem_lru_remove_locked(obj); lru->count += obj->size >> PAGE_SHIFT; list_add_tail(&obj->lru_node, &lru->list); obj->lru = lru; } EXPORT_SYMBOL(drm_gem_lru_move_tail_locked); /** * drm_gem_lru_move_tail - move the object to the tail of the LRU * * If the object is already in this LRU it will be moved to the * tail. Otherwise it will be removed from whichever other LRU * it is in (if any) and moved into this LRU. * * @lru: The LRU to move the object into. * @obj: The GEM object to move into this LRU */ void drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj) { mutex_lock(lru->lock); drm_gem_lru_move_tail_locked(lru, obj); mutex_unlock(lru->lock); } EXPORT_SYMBOL(drm_gem_lru_move_tail); /** * drm_gem_lru_scan - helper to implement shrinker.scan_objects * * If the shrink callback succeeds, it is expected that the driver * move the object out of this LRU. * * If the LRU possibly contain active buffers, it is the responsibility * of the shrink callback to check for this (ie. dma_resv_test_signaled()) * or if necessary block until the buffer becomes idle. * * @lru: The LRU to scan * @nr_to_scan: The number of pages to try to reclaim * @remaining: The number of pages left to reclaim, should be initialized by caller * @shrink: Callback to try to shrink/reclaim the object. */ unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned int nr_to_scan, unsigned long *remaining, bool (*shrink)(struct drm_gem_object *obj)) { struct drm_gem_lru still_in_lru; struct drm_gem_object *obj; unsigned freed = 0; drm_gem_lru_init(&still_in_lru, lru->lock); mutex_lock(lru->lock); while (freed < nr_to_scan) { obj = list_first_entry_or_null(&lru->list, typeof(*obj), lru_node); if (!obj) break; drm_gem_lru_move_tail_locked(&still_in_lru, obj); /* * If it's in the process of being freed, gem_object->free() * may be blocked on lock waiting to remove it. So just * skip it. */ if (!kref_get_unless_zero(&obj->refcount)) continue; /* * Now that we own a reference, we can drop the lock for the * rest of the loop body, to reduce contention with other * code paths that need the LRU lock */ mutex_unlock(lru->lock); /* * Note that this still needs to be trylock, since we can * hit shrinker in response to trying to get backing pages * for this obj (ie. while it's lock is already held) */ if (!dma_resv_trylock(obj->resv)) { *remaining += obj->size >> PAGE_SHIFT; goto tail; } if (shrink(obj)) { freed += obj->size >> PAGE_SHIFT; /* * If we succeeded in releasing the object's backing * pages, we expect the driver to have moved the object * out of this LRU */ WARN_ON(obj->lru == &still_in_lru); WARN_ON(obj->lru == lru); } dma_resv_unlock(obj->resv); tail: drm_gem_object_put(obj); mutex_lock(lru->lock); } /* * Move objects we've skipped over out of the temporary still_in_lru * back into this LRU */ list_for_each_entry (obj, &still_in_lru.list, lru_node) obj->lru = lru; list_splice_tail(&still_in_lru.list, &lru->list); lru->count += still_in_lru.count; mutex_unlock(lru->lock); return freed; } EXPORT_SYMBOL(drm_gem_lru_scan); /** * drm_gem_evict - helper to evict backing pages for a GEM object * @obj: obj in question */ int drm_gem_evict(struct drm_gem_object *obj) { dma_resv_assert_held(obj->resv); if (!dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ)) return -EBUSY; if (obj->funcs->evict) return obj->funcs->evict(obj); return 0; } EXPORT_SYMBOL(drm_gem_evict); |
4 4 6 7 6 1 2 4 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 | // SPDX-License-Identifier: GPL-2.0-only /* * LED support for the input layer * * Copyright 2010-2015 Samuel Thibault <samuel.thibault@ens-lyon.org> */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/init.h> #include <linux/leds.h> #include <linux/input.h> #if IS_ENABLED(CONFIG_VT) #define VT_TRIGGER(_name) .trigger = _name #else #define VT_TRIGGER(_name) .trigger = NULL #endif static const struct { const char *name; const char *trigger; } input_led_info[LED_CNT] = { [LED_NUML] = { "numlock", VT_TRIGGER("kbd-numlock") }, [LED_CAPSL] = { "capslock", VT_TRIGGER("kbd-capslock") }, [LED_SCROLLL] = { "scrolllock", VT_TRIGGER("kbd-scrolllock") }, [LED_COMPOSE] = { "compose" }, [LED_KANA] = { "kana", VT_TRIGGER("kbd-kanalock") }, [LED_SLEEP] = { "sleep" } , [LED_SUSPEND] = { "suspend" }, [LED_MUTE] = { "mute" }, [LED_MISC] = { "misc" }, [LED_MAIL] = { "mail" }, [LED_CHARGING] = { "charging" }, }; struct input_led { struct led_classdev cdev; struct input_handle *handle; unsigned int code; /* One of LED_* constants */ }; struct input_leds { struct input_handle handle; unsigned int num_leds; struct input_led leds[] __counted_by(num_leds); }; static enum led_brightness input_leds_brightness_get(struct led_classdev *cdev) { struct input_led *led = container_of(cdev, struct input_led, cdev); struct input_dev *input = led->handle->dev; return test_bit(led->code, input->led) ? cdev->max_brightness : 0; } static void input_leds_brightness_set(struct led_classdev *cdev, enum led_brightness brightness) { struct input_led *led = container_of(cdev, struct input_led, cdev); input_inject_event(led->handle, EV_LED, led->code, !!brightness); } static void input_leds_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) { } static int input_leds_get_count(struct input_dev *dev) { unsigned int led_code; int count = 0; for_each_set_bit(led_code, dev->ledbit, LED_CNT) if (input_led_info[led_code].name) count++; return count; } static int input_leds_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { struct input_leds *leds; struct input_led *led; unsigned int num_leds; unsigned int led_code; int led_no; int error; num_leds = input_leds_get_count(dev); if (!num_leds) return -ENXIO; leds = kzalloc(struct_size(leds, leds, num_leds), GFP_KERNEL); if (!leds) return -ENOMEM; leds->num_leds = num_leds; leds->handle.dev = dev; leds->handle.handler = handler; leds->handle.name = "leds"; leds->handle.private = leds; error = input_register_handle(&leds->handle); if (error) goto err_free_mem; error = input_open_device(&leds->handle); if (error) goto err_unregister_handle; led_no = 0; for_each_set_bit(led_code, dev->ledbit, LED_CNT) { if (!input_led_info[led_code].name) continue; led = &leds->leds[led_no]; led->handle = &leds->handle; led->code = led_code; led->cdev.name = kasprintf(GFP_KERNEL, "%s::%s", dev_name(&dev->dev), input_led_info[led_code].name); if (!led->cdev.name) { error = -ENOMEM; goto err_unregister_leds; } led->cdev.max_brightness = 1; led->cdev.brightness_get = input_leds_brightness_get; led->cdev.brightness_set = input_leds_brightness_set; led->cdev.default_trigger = input_led_info[led_code].trigger; error = led_classdev_register(&dev->dev, &led->cdev); if (error) { dev_err(&dev->dev, "failed to register LED %s: %d\n", led->cdev.name, error); kfree(led->cdev.name); goto err_unregister_leds; } led_no++; } return 0; err_unregister_leds: while (--led_no >= 0) { struct input_led *led = &leds->leds[led_no]; led_classdev_unregister(&led->cdev); kfree(led->cdev.name); } input_close_device(&leds->handle); err_unregister_handle: input_unregister_handle(&leds->handle); err_free_mem: kfree(leds); return error; } static void input_leds_disconnect(struct input_handle *handle) { struct input_leds *leds = handle->private; int i; for (i = 0; i < leds->num_leds; i++) { struct input_led *led = &leds->leds[i]; led_classdev_unregister(&led->cdev); kfree(led->cdev.name); } input_close_device(handle); input_unregister_handle(handle); kfree(leds); } static const struct input_device_id input_leds_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT, .evbit = { BIT_MASK(EV_LED) }, }, { }, }; MODULE_DEVICE_TABLE(input, input_leds_ids); static struct input_handler input_leds_handler = { .event = input_leds_event, .connect = input_leds_connect, .disconnect = input_leds_disconnect, .name = "leds", .id_table = input_leds_ids, }; static int __init input_leds_init(void) { return input_register_handler(&input_leds_handler); } module_init(input_leds_init); static void __exit input_leds_exit(void) { input_unregister_handler(&input_leds_handler); } module_exit(input_leds_exit); MODULE_AUTHOR("Samuel Thibault <samuel.thibault@ens-lyon.org>"); MODULE_AUTHOR("Dmitry Torokhov <dmitry.torokhov@gmail.com>"); MODULE_DESCRIPTION("Input -> LEDs Bridge"); MODULE_LICENSE("GPL v2"); |
13832 9448 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_COMMON_H #define _NF_CONNTRACK_COMMON_H #include <linux/refcount.h> #include <uapi/linux/netfilter/nf_conntrack_common.h> struct ip_conntrack_stat { unsigned int found; unsigned int invalid; unsigned int insert; unsigned int insert_failed; unsigned int clash_resolve; unsigned int drop; unsigned int early_drop; unsigned int error; unsigned int expect_new; unsigned int expect_create; unsigned int expect_delete; unsigned int search_restart; unsigned int chaintoolong; }; #define NFCT_INFOMASK 7UL #define NFCT_PTRMASK ~(NFCT_INFOMASK) struct nf_conntrack { refcount_t use; }; void nf_conntrack_destroy(struct nf_conntrack *nfct); /* like nf_ct_put, but without module dependency on nf_conntrack */ static inline void nf_conntrack_put(struct nf_conntrack *nfct) { if (nfct && refcount_dec_and_test(&nfct->use)) nf_conntrack_destroy(nfct); } static inline void nf_conntrack_get(struct nf_conntrack *nfct) { if (nfct) refcount_inc(&nfct->use); } #endif /* _NF_CONNTRACK_COMMON_H */ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* I2C message transfer tracepoints * * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #undef TRACE_SYSTEM #define TRACE_SYSTEM i2c #if !defined(_TRACE_I2C_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_I2C_H #include <linux/i2c.h> #include <linux/tracepoint.h> /* * drivers/i2c/i2c-core-base.c */ extern int i2c_transfer_trace_reg(void); extern void i2c_transfer_trace_unreg(void); /* * __i2c_transfer() write request */ TRACE_EVENT_FN(i2c_write, TP_PROTO(const struct i2c_adapter *adap, const struct i2c_msg *msg, int num), TP_ARGS(adap, msg, num), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, msg_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u16, len ) __dynamic_array(__u8, buf, msg->len) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->msg_nr = num; __entry->addr = msg->addr; __entry->flags = msg->flags; __entry->len = msg->len; memcpy(__get_dynamic_array(buf), msg->buf, msg->len); ), TP_printk("i2c-%d #%u a=%03x f=%04x l=%u [%*phD]", __entry->adapter_nr, __entry->msg_nr, __entry->addr, __entry->flags, __entry->len, __entry->len, __get_dynamic_array(buf) ), i2c_transfer_trace_reg, i2c_transfer_trace_unreg); /* * __i2c_transfer() read request */ TRACE_EVENT_FN(i2c_read, TP_PROTO(const struct i2c_adapter *adap, const struct i2c_msg *msg, int num), TP_ARGS(adap, msg, num), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, msg_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u16, len ) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->msg_nr = num; __entry->addr = msg->addr; __entry->flags = msg->flags; __entry->len = msg->len; ), TP_printk("i2c-%d #%u a=%03x f=%04x l=%u", __entry->adapter_nr, __entry->msg_nr, __entry->addr, __entry->flags, __entry->len ), i2c_transfer_trace_reg, i2c_transfer_trace_unreg); /* * __i2c_transfer() read reply */ TRACE_EVENT_FN(i2c_reply, TP_PROTO(const struct i2c_adapter *adap, const struct i2c_msg *msg, int num), TP_ARGS(adap, msg, num), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, msg_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u16, len ) __dynamic_array(__u8, buf, msg->len) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->msg_nr = num; __entry->addr = msg->addr; __entry->flags = msg->flags; __entry->len = msg->len; memcpy(__get_dynamic_array(buf), msg->buf, msg->len); ), TP_printk("i2c-%d #%u a=%03x f=%04x l=%u [%*phD]", __entry->adapter_nr, __entry->msg_nr, __entry->addr, __entry->flags, __entry->len, __entry->len, __get_dynamic_array(buf) ), i2c_transfer_trace_reg, i2c_transfer_trace_unreg); /* * __i2c_transfer() result */ TRACE_EVENT_FN(i2c_result, TP_PROTO(const struct i2c_adapter *adap, int num, int ret), TP_ARGS(adap, num, ret), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, nr_msgs ) __field(__s16, ret ) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->nr_msgs = num; __entry->ret = ret; ), TP_printk("i2c-%d n=%u ret=%d", __entry->adapter_nr, __entry->nr_msgs, __entry->ret ), i2c_transfer_trace_reg, i2c_transfer_trace_unreg); #endif /* _TRACE_I2C_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
499 489 478 11 29 28 27 13 1 5 11 3 12 2 1 2 2 2 489 447 44 44 479 472 151 2 1 3 1 1 6 6 1 31 31 4 1 4 1 31 31 28 10 7 1 2 3 70 8 14 14 14 14 14 5 9 9 1 45 8 113 14 11 237 1 1 2 6 107 3 2 1 34 2 228 224 6 220 305 305 3 3 285 1 2 207 207 6 5 110 43 73 234 3 260 3 3 72 61 11 159 34 1 162 36 12 1 220 240 201 40 20 207 16 7 26 1 253 32 265 52 9 9 3 15 2 13 55 12 93 12 9 12 1 16 16 16 16 47 27 549 66 63 48 4 4 1 4 3 2 4 1 84 81 1 83 2 13 11 59 19 72 54 44 11 5 9 10 10 15 10 14 1 7 4 3 2 12 2 23 17 6 9 1 11 1 3 3 539 509 30 1 42 6 37 32 4 2 10 8 14 6 1 1 8 8 14 7 18 18 18 4 4 2 33 10 8 42 2 2 7 7 18 7 3 1 1 3 3 4 1 16 6 48 1 13 8 7 36 45 37 6 1 37 8 5 1 129 84 2 2 41 1 2 6 4 2 2 1 3 3 1 2 2 1 1 317 40 272 20 1 2 1 2 2 2 2 2 4 2 5 12 57 5 52 19 18 3 1 1 1 5 6 6 6 6 5 5 5 5 5 3 2 6 6 6 6 5 6 6 2 3 3 3 460 460 460 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 | // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * The User Datagram Protocol (UDP). * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Alan Cox, <alan@lxorguk.ukuu.org.uk> * Hirokazu Takahashi, <taka@valinux.co.jp> * * Fixes: * Alan Cox : verify_area() calls * Alan Cox : stopped close while in use off icmp * messages. Not a fix but a botch that * for udp at least is 'valid'. * Alan Cox : Fixed icmp handling properly * Alan Cox : Correct error for oversized datagrams * Alan Cox : Tidied select() semantics. * Alan Cox : udp_err() fixed properly, also now * select and read wake correctly on errors * Alan Cox : udp_send verify_area moved to avoid mem leak * Alan Cox : UDP can count its memory * Alan Cox : send to an unknown connection causes * an ECONNREFUSED off the icmp, but * does NOT close. * Alan Cox : Switched to new sk_buff handlers. No more backlog! * Alan Cox : Using generic datagram code. Even smaller and the PEEK * bug no longer crashes it. * Fred Van Kempen : Net2e support for sk->broadcast. * Alan Cox : Uses skb_free_datagram * Alan Cox : Added get/set sockopt support. * Alan Cox : Broadcasting without option set returns EACCES. * Alan Cox : No wakeup calls. Instead we now use the callbacks. * Alan Cox : Use ip_tos and ip_ttl * Alan Cox : SNMP Mibs * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support. * Matt Dillon : UDP length checks. * Alan Cox : Smarter af_inet used properly. * Alan Cox : Use new kernel side addressing. * Alan Cox : Incorrect return on truncated datagram receive. * Arnt Gulbrandsen : New udp_send and stuff * Alan Cox : Cache last socket * Alan Cox : Route cache * Jon Peatfield : Minor efficiency fix to sendto(). * Mike Shaver : RFC1122 checks. * Alan Cox : Nonblocking error fix. * Willy Konynenberg : Transparent proxying support. * Mike McLagan : Routing by source * David S. Miller : New socket lookup architecture. * Last socket cache retained as it * does have a high hit rate. * Olaf Kirch : Don't linearise iovec on sendmsg. * Andi Kleen : Some cleanups, cache destination entry * for connect. * Vitaly E. Lavrov : Transparent proxy revived after year coma. * Melvin Smith : Check msg_name not msg_namelen in sendto(), * return ENOTCONN for unconnected sockets (POSIX) * Janos Farkas : don't deliver multi/broadcasts to a different * bound-to-device socket * Hirokazu Takahashi : HW checksumming for outgoing UDP * datagrams. * Hirokazu Takahashi : sendfile() on UDP works now. * Arnaldo C. Melo : convert /proc/net/udp to seq_file * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind * a single port at the same time. * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support * James Chapman : Add L2TP encapsulation type. */ #define pr_fmt(fmt) "UDP: " fmt #include <linux/bpf-cgroup.h> #include <linux/uaccess.h> #include <asm/ioctls.h> #include <linux/memblock.h> #include <linux/highmem.h> #include <linux/types.h> #include <linux/fcntl.h> #include <linux/module.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/igmp.h> #include <linux/inetdevice.h> #include <linux/in.h> #include <linux/errno.h> #include <linux/timer.h> #include <linux/mm.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <net/tcp_states.h> #include <linux/skbuff.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <net/net_namespace.h> #include <net/icmp.h> #include <net/inet_hashtables.h> #include <net/ip_tunnels.h> #include <net/route.h> #include <net/checksum.h> #include <net/gso.h> #include <net/xfrm.h> #include <trace/events/udp.h> #include <linux/static_key.h> #include <linux/btf_ids.h> #include <trace/events/skb.h> #include <net/busy_poll.h> #include "udp_impl.h" #include <net/sock_reuseport.h> #include <net/addrconf.h> #include <net/udp_tunnel.h> #include <net/gro.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6_stubs.h> #endif struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); long sysctl_udp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_udp_mem); atomic_long_t udp_memory_allocated ____cacheline_aligned_in_smp; EXPORT_SYMBOL(udp_memory_allocated); DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc); #define MAX_UDP_PORTS 65536 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN_PERNET) static struct udp_table *udp_get_table_prot(struct sock *sk) { return sk->sk_prot->h.udp_table ? : sock_net(sk)->ipv4.udp_table; } static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, unsigned long *bitmap, struct sock *sk, unsigned int log) { struct sock *sk2; kuid_t uid = sock_i_uid(sk); sk_for_each(sk2, &hslot->head) { if (net_eq(sock_net(sk2), net) && sk2 != sk && (bitmap || udp_sk(sk2)->udp_port_hash == num) && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && inet_rcv_saddr_equal(sk, sk2, true)) { if (sk2->sk_reuseport && sk->sk_reuseport && !rcu_access_pointer(sk->sk_reuseport_cb) && uid_eq(uid, sock_i_uid(sk2))) { if (!bitmap) return 0; } else { if (!bitmap) return 1; __set_bit(udp_sk(sk2)->udp_port_hash >> log, bitmap); } } } return 0; } /* * Note: we still hold spinlock of primary hash chain, so no other writer * can insert/delete a socket with local_port == num */ static int udp_lib_lport_inuse2(struct net *net, __u16 num, struct udp_hslot *hslot2, struct sock *sk) { struct sock *sk2; kuid_t uid = sock_i_uid(sk); int res = 0; spin_lock(&hslot2->lock); udp_portaddr_for_each_entry(sk2, &hslot2->head) { if (net_eq(sock_net(sk2), net) && sk2 != sk && (udp_sk(sk2)->udp_port_hash == num) && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && inet_rcv_saddr_equal(sk, sk2, true)) { if (sk2->sk_reuseport && sk->sk_reuseport && !rcu_access_pointer(sk->sk_reuseport_cb) && uid_eq(uid, sock_i_uid(sk2))) { res = 0; } else { res = 1; } break; } } spin_unlock(&hslot2->lock); return res; } static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) { struct net *net = sock_net(sk); kuid_t uid = sock_i_uid(sk); struct sock *sk2; sk_for_each(sk2, &hslot->head) { if (net_eq(sock_net(sk2), net) && sk2 != sk && sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && (udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) && (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && inet_rcv_saddr_equal(sk, sk2, false)) { return reuseport_add_sock(sk, sk2, inet_rcv_saddr_any(sk)); } } return reuseport_alloc(sk, inet_rcv_saddr_any(sk)); } /** * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 * * @sk: socket struct in question * @snum: port number to look up * @hash2_nulladdr: AF-dependent hash value in secondary hash chains, * with NULL address */ int udp_lib_get_port(struct sock *sk, unsigned short snum, unsigned int hash2_nulladdr) { struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2; struct net *net = sock_net(sk); int error = -EADDRINUSE; if (!snum) { DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); unsigned short first, last; int low, high, remaining; unsigned int rand; inet_sk_get_local_port_range(sk, &low, &high); remaining = (high - low) + 1; rand = get_random_u32(); first = reciprocal_scale(rand, remaining) + low; /* * force rand to be an odd multiple of UDP_HTABLE_SIZE */ rand = (rand | 1) * (udptable->mask + 1); last = first + udptable->mask + 1; do { hslot = udp_hashslot(udptable, net, first); bitmap_zero(bitmap, PORTS_PER_CHAIN); spin_lock_bh(&hslot->lock); udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, udptable->log); snum = first; /* * Iterate on all possible values of snum for this hash. * Using steps of an odd multiple of UDP_HTABLE_SIZE * give us randomization and full range coverage. */ do { if (low <= snum && snum <= high && !test_bit(snum >> udptable->log, bitmap) && !inet_is_local_reserved_port(net, snum)) goto found; snum += rand; } while (snum != first); spin_unlock_bh(&hslot->lock); cond_resched(); } while (++first != last); goto fail; } else { hslot = udp_hashslot(udptable, net, snum); spin_lock_bh(&hslot->lock); if (hslot->count > 10) { int exist; unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; slot2 &= udptable->mask; hash2_nulladdr &= udptable->mask; hslot2 = udp_hashslot2(udptable, slot2); if (hslot->count < hslot2->count) goto scan_primary_hash; exist = udp_lib_lport_inuse2(net, snum, hslot2, sk); if (!exist && (hash2_nulladdr != slot2)) { hslot2 = udp_hashslot2(udptable, hash2_nulladdr); exist = udp_lib_lport_inuse2(net, snum, hslot2, sk); } if (exist) goto fail_unlock; else goto found; } scan_primary_hash: if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 0)) goto fail_unlock; } found: inet_sk(sk)->inet_num = snum; udp_sk(sk)->udp_port_hash = snum; udp_sk(sk)->udp_portaddr_hash ^= snum; if (sk_unhashed(sk)) { if (sk->sk_reuseport && udp_reuseport_add_sock(sk, hslot)) { inet_sk(sk)->inet_num = 0; udp_sk(sk)->udp_port_hash = 0; udp_sk(sk)->udp_portaddr_hash ^= snum; goto fail_unlock; } sk_add_node_rcu(sk, &hslot->head); hslot->count++; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); spin_lock(&hslot2->lock); if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && sk->sk_family == AF_INET6) hlist_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node, &hslot2->head); else hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, &hslot2->head); hslot2->count++; spin_unlock(&hslot2->lock); } sock_set_flag(sk, SOCK_RCU_FREE); error = 0; fail_unlock: spin_unlock_bh(&hslot->lock); fail: return error; } EXPORT_SYMBOL(udp_lib_get_port); int udp_v4_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); unsigned int hash2_partial = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; return udp_lib_get_port(sk, snum, hash2_nulladdr); } static int compute_score(struct sock *sk, struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned short hnum, int dif, int sdif) { int score; struct inet_sock *inet; bool dev_match; if (!net_eq(sock_net(sk), net) || udp_sk(sk)->udp_port_hash != hnum || ipv6_only_sock(sk)) return -1; if (sk->sk_rcv_saddr != daddr) return -1; score = (sk->sk_family == PF_INET) ? 2 : 1; inet = inet_sk(sk); if (inet->inet_daddr) { if (inet->inet_daddr != saddr) return -1; score += 4; } if (inet->inet_dport) { if (inet->inet_dport != sport) return -1; score += 4; } dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif); if (!dev_match) return -1; if (sk->sk_bound_dev_if) score += 4; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; return score; } INDIRECT_CALLABLE_SCOPE u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) { static u32 udp_ehash_secret __read_mostly; net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret)); return __inet_ehashfn(laddr, lport, faddr, fport, udp_ehash_secret + net_hash_mix(net)); } /* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned int hnum, int dif, int sdif, struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; int score, badness; result = NULL; badness = 0; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { badness = score; if (sk->sk_state == TCP_ESTABLISHED) { result = sk; continue; } result = inet_lookup_reuseport(net, sk, skb, sizeof(struct udphdr), saddr, sport, daddr, hnum, udp_ehashfn); if (!result) { result = sk; continue; } /* Fall back to scoring if group has connections */ if (!reuseport_has_conns(sk)) return result; /* Reuseport logic returned an error, keep original score. */ if (IS_ERR(result)) continue; badness = compute_score(result, net, saddr, sport, daddr, hnum, dif, sdif); } } return result; } /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, int sdif, struct udp_table *udptable, struct sk_buff *skb) { unsigned short hnum = ntohs(dport); unsigned int hash2, slot2; struct udp_hslot *hslot2; struct sock *result, *sk; hash2 = ipv4_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; /* Lookup connected or non-wildcard socket */ result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, sdif, hslot2, skb); if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED) goto done; /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled) && udptable == net->ipv4.udp_table) { sk = inet_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr), saddr, sport, daddr, hnum, dif, udp_ehashfn); if (sk) { result = sk; goto done; } } /* Got non-wildcard socket or error on first lookup */ if (result) goto done; /* Lookup wildcard sockets */ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; result = udp4_lib_lookup2(net, saddr, sport, htonl(INADDR_ANY), hnum, dif, sdif, hslot2, skb); done: if (IS_ERR(result)) return NULL; return result; } EXPORT_SYMBOL_GPL(__udp4_lib_lookup); static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, struct udp_table *udptable) { const struct iphdr *iph = ip_hdr(skb); return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, iph->daddr, dport, inet_iif(skb), inet_sdif(skb), udptable, skb); } struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { const struct iphdr *iph = ip_hdr(skb); struct net *net = dev_net(skb->dev); int iif, sdif; inet_get_iif_sdif(skb, &iif, &sdif); return __udp4_lib_lookup(net, iph->saddr, sport, iph->daddr, dport, iif, sdif, net->ipv4.udp_table, NULL); } /* Must be called under rcu_read_lock(). * Does increment socket refcount. */ #if IS_ENABLED(CONFIG_NF_TPROXY_IPV4) || IS_ENABLED(CONFIG_NF_SOCKET_IPV4) struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { struct sock *sk; sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, 0, net->ipv4.udp_table, NULL); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } EXPORT_SYMBOL_GPL(udp4_lib_lookup); #endif static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, int dif, int sdif, unsigned short hnum) { const struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net) || udp_sk(sk)->udp_port_hash != hnum || (inet->inet_daddr && inet->inet_daddr != rmt_addr) || (inet->inet_dport != rmt_port && inet->inet_dport) || (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) || ipv6_only_sock(sk) || !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return false; if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif)) return false; return true; } DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); void udp_encap_enable(void) { static_branch_inc(&udp_encap_needed_key); } EXPORT_SYMBOL(udp_encap_enable); void udp_encap_disable(void) { static_branch_dec(&udp_encap_needed_key); } EXPORT_SYMBOL(udp_encap_disable); /* Handler for tunnels with arbitrary destination ports: no socket lookup, go * through error handlers in encapsulations looking for a match. */ static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info) { int i; for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) { int (*handler)(struct sk_buff *skb, u32 info); const struct ip_tunnel_encap_ops *encap; encap = rcu_dereference(iptun_encaps[i]); if (!encap) continue; handler = encap->err_handler; if (handler && !handler(skb, info)) return 0; } return -ENOENT; } /* Try to match ICMP errors to UDP tunnels by looking up a socket without * reversing source and destination port: this will match tunnels that force the * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that * lwtunnels might actually break this assumption by being configured with * different destination ports on endpoints, in this case we won't be able to * trace ICMP messages back to them. * * If this doesn't match any socket, probe tunnels with arbitrary destination * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port * we've sent packets to won't necessarily match the local destination port. * * Then ask the tunnel implementation to match the error against a valid * association. * * Return an error if we can't find a match, the socket if we need further * processing, zero otherwise. */ static struct sock *__udp4_lib_err_encap(struct net *net, const struct iphdr *iph, struct udphdr *uh, struct udp_table *udptable, struct sock *sk, struct sk_buff *skb, u32 info) { int (*lookup)(struct sock *sk, struct sk_buff *skb); int network_offset, transport_offset; struct udp_sock *up; network_offset = skb_network_offset(skb); transport_offset = skb_transport_offset(skb); /* Network header needs to point to the outer IPv4 header inside ICMP */ skb_reset_network_header(skb); /* Transport header needs to point to the UDP header */ skb_set_transport_header(skb, iph->ihl << 2); if (sk) { up = udp_sk(sk); lookup = READ_ONCE(up->encap_err_lookup); if (lookup && lookup(sk, skb)) sk = NULL; goto out; } sk = __udp4_lib_lookup(net, iph->daddr, uh->source, iph->saddr, uh->dest, skb->dev->ifindex, 0, udptable, NULL); if (sk) { up = udp_sk(sk); lookup = READ_ONCE(up->encap_err_lookup); if (!lookup || lookup(sk, skb)) sk = NULL; } out: if (!sk) sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info)); skb_set_transport_header(skb, transport_offset); skb_set_network_header(skb, network_offset); return sk; } /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should * be closed and the error returned to the user. If err > 0 * it's just the icmp type << 8 | icmp code. * Header points to the ip header of the error packet. We move * on past this. Then (as it used to claim before adjustment) * header points to the first 8 bytes of the udp header. We need * to find the appropriate port. */ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) { struct inet_sock *inet; const struct iphdr *iph = (const struct iphdr *)skb->data; struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; bool tunnel = false; struct sock *sk; int harderr; int err; struct net *net = dev_net(skb->dev); sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, inet_sdif(skb), udptable, NULL); if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) { /* No socket for error: try tunnels before discarding */ if (static_branch_unlikely(&udp_encap_needed_key)) { sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb, info); if (!sk) return 0; } else sk = ERR_PTR(-ENOENT); if (IS_ERR(sk)) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return PTR_ERR(sk); } tunnel = true; } err = 0; harderr = 0; inet = inet_sk(sk); switch (type) { default: case ICMP_TIME_EXCEEDED: err = EHOSTUNREACH; break; case ICMP_SOURCE_QUENCH: goto out; case ICMP_PARAMETERPROB: err = EPROTO; harderr = 1; break; case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ ipv4_sk_update_pmtu(skb, sk, info); if (READ_ONCE(inet->pmtudisc) != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; break; } goto out; } err = EHOSTUNREACH; if (code <= NR_ICMP_UNREACH) { harderr = icmp_err_convert[code].fatal; err = icmp_err_convert[code].errno; } break; case ICMP_REDIRECT: ipv4_sk_redirect(skb, sk); goto out; } /* * RFC1122: OK. Passes ICMP errors back to application, as per * 4.1.3.3. */ if (tunnel) { /* ...not for tunnels though: we don't have a sending socket */ if (udp_sk(sk)->encap_err_rcv) udp_sk(sk)->encap_err_rcv(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); goto out; } if (!inet_test_bit(RECVERR, sk)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); sk->sk_err = err; sk_error_report(sk); out: return 0; } int udp_err(struct sk_buff *skb, u32 info) { return __udp4_lib_err(skb, info, dev_net(skb->dev)->ipv4.udp_table); } /* * Throw away all pending data and cancel the corking. Socket is locked. */ void udp_flush_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); if (up->pending) { up->len = 0; up->pending = 0; ip_flush_pending_frames(sk); } } EXPORT_SYMBOL(udp_flush_pending_frames); /** * udp4_hwcsum - handle outgoing HW checksumming * @skb: sk_buff containing the filled-in UDP header * (checksum field must be zeroed out) * @src: source IP address * @dst: destination IP address */ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) { struct udphdr *uh = udp_hdr(skb); int offset = skb_transport_offset(skb); int len = skb->len - offset; int hlen = len; __wsum csum = 0; if (!skb_has_frag_list(skb)) { /* * Only one fragment on the socket. */ skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); } else { struct sk_buff *frags; /* * HW-checksum won't work as there are two or more * fragments on the socket so that all csums of sk_buffs * should be together */ skb_walk_frags(skb, frags) { csum = csum_add(csum, frags->csum); hlen -= frags->len; } csum = skb_checksum(skb, offset, hlen, csum); skb->ip_summed = CHECKSUM_NONE; uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; } } EXPORT_SYMBOL_GPL(udp4_hwcsum); /* Function to set UDP checksum for an IPv4 UDP packet. This is intended * for the simple case like when setting the checksum for a UDP tunnel. */ void udp_set_csum(bool nocheck, struct sk_buff *skb, __be32 saddr, __be32 daddr, int len) { struct udphdr *uh = udp_hdr(skb); if (nocheck) { uh->check = 0; } else if (skb_is_gso(skb)) { uh->check = ~udp_v4_check(len, saddr, daddr, 0); } else if (skb->ip_summed == CHECKSUM_PARTIAL) { uh->check = 0; uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb)); if (uh->check == 0) uh->check = CSUM_MANGLED_0; } else { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); uh->check = ~udp_v4_check(len, saddr, daddr, 0); } } EXPORT_SYMBOL(udp_set_csum); static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, struct inet_cork *cork) { struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct udphdr *uh; int err; int is_udplite = IS_UDPLITE(sk); int offset = skb_transport_offset(skb); int len = skb->len - offset; int datalen = len - sizeof(*uh); __wsum csum = 0; /* * Create a UDP header */ uh = udp_hdr(skb); uh->source = inet->inet_sport; uh->dest = fl4->fl4_dport; uh->len = htons(len); uh->check = 0; if (cork->gso_size) { const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); if (hlen + cork->gso_size > cork->fragsize) { kfree_skb(skb); return -EINVAL; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } if (sk->sk_no_check_tx) { kfree_skb(skb); return -EINVAL; } if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || dst_xfrm(skb_dst(skb))) { kfree_skb(skb); return -EIO; } if (datalen > cork->gso_size) { skb_shinfo(skb)->gso_size = cork->gso_size; skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen, cork->gso_size); } goto csum_partial; } if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); else if (sk->sk_no_check_tx) { /* UDP csum off */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ csum_partial: udp4_hwcsum(skb, fl4->saddr, fl4->daddr); goto send; } else csum = udp_csum(skb); /* add protocol-dependent pseudo-header */ uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len, sk->sk_protocol, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; send: err = ip_send_skb(sock_net(sk), skb); if (err) { if (err == -ENOBUFS && !inet_test_bit(RECVERR, sk)) { UDP_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS, is_udplite); err = 0; } } else UDP_INC_STATS(sock_net(sk), UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } /* * Push out all pending data as one UDP datagram. Socket is locked. */ int udp_push_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); struct flowi4 *fl4 = &inet->cork.fl.u.ip4; struct sk_buff *skb; int err = 0; skb = ip_finish_skb(sk, fl4); if (!skb) goto out; err = udp_send_skb(skb, fl4, &inet->cork.base); out: up->len = 0; up->pending = 0; return err; } EXPORT_SYMBOL(udp_push_pending_frames); static int __udp_cmsg_send(struct cmsghdr *cmsg, u16 *gso_size) { switch (cmsg->cmsg_type) { case UDP_SEGMENT: if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u16))) return -EINVAL; *gso_size = *(__u16 *)CMSG_DATA(cmsg); return 0; default: return -EINVAL; } } int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size) { struct cmsghdr *cmsg; bool need_ip = false; int err; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; if (cmsg->cmsg_level != SOL_UDP) { need_ip = true; continue; } err = __udp_cmsg_send(cmsg, gso_size); if (err) return err; } return need_ip; } EXPORT_SYMBOL_GPL(udp_cmsg_send); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); struct flowi4 fl4_stack; struct flowi4 *fl4; int ulen = len; struct ipcm_cookie ipc; struct rtable *rt = NULL; int free = 0; int connected = 0; __be32 daddr, faddr, saddr; u8 tos, scope; __be16 dport; int err, is_udplite = IS_UDPLITE(sk); int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); struct sk_buff *skb; struct ip_options_data opt_copy; int uc_index; if (len > 0xFFFF) return -EMSGSIZE; /* * Check the flags. */ if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ return -EOPNOTSUPP; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; fl4 = &inet->cork.fl.u.ip4; if (up->pending) { /* * There are pending frames. * The socket lock must be held while it's corked. */ lock_sock(sk); if (likely(up->pending)) { if (unlikely(up->pending != AF_INET)) { release_sock(sk); return -EINVAL; } goto do_append_data; } release_sock(sk); } ulen += sizeof(struct udphdr); /* * Get and verify the address. */ if (usin) { if (msg->msg_namelen < sizeof(*usin)) return -EINVAL; if (usin->sin_family != AF_INET) { if (usin->sin_family != AF_UNSPEC) return -EAFNOSUPPORT; } daddr = usin->sin_addr.s_addr; dport = usin->sin_port; if (dport == 0) return -EINVAL; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; daddr = inet->inet_daddr; dport = inet->inet_dport; /* Open fast path for connected socket. Route will not be used, if at least one option is set. */ connected = 1; } ipcm_init_sk(&ipc, inet); ipc.gso_size = READ_ONCE(up->gso_size); if (msg->msg_controllen) { err = udp_cmsg_send(sk, msg, &ipc.gso_size); if (err > 0) err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); if (unlikely(err < 0)) { kfree(ipc.opt); return err; } if (ipc.opt) free = 1; connected = 0; } if (!ipc.opt) { struct ip_options_rcu *inet_opt; rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt) { memcpy(&opt_copy, inet_opt, sizeof(*inet_opt) + inet_opt->opt.optlen); ipc.opt = &opt_copy.opt; } rcu_read_unlock(); } if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, (struct sockaddr *)usin, &msg->msg_namelen, &ipc.addr); if (err) goto out_free; if (usin) { if (usin->sin_port == 0) { /* BPF program set invalid port. Reject it. */ err = -EINVAL; goto out_free; } daddr = usin->sin_addr.s_addr; dport = usin->sin_port; } } saddr = ipc.addr; ipc.addr = faddr = daddr; if (ipc.opt && ipc.opt->opt.srr) { if (!daddr) { err = -EINVAL; goto out_free; } faddr = ipc.opt->opt.faddr; connected = 0; } tos = get_rttos(&ipc, inet); scope = ip_sendmsg_scope(inet, &ipc, msg); if (scope == RT_SCOPE_LINK) connected = 0; uc_index = READ_ONCE(inet->uc_index); if (ipv4_is_multicast(daddr)) { if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) ipc.oif = READ_ONCE(inet->mc_index); if (!saddr) saddr = READ_ONCE(inet->mc_addr); connected = 0; } else if (!ipc.oif) { ipc.oif = uc_index; } else if (ipv4_is_lbcast(daddr) && uc_index) { /* oif is set, packet is to local broadcast and * uc_index is set. oif is most likely set * by sk_bound_dev_if. If uc_index != oif check if the * oif is an L3 master and uc_index is an L3 slave. * If so, we want to allow the send using the uc_index. */ if (ipc.oif != uc_index && ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk), uc_index)) { ipc.oif = uc_index; } } if (connected) rt = (struct rtable *)sk_dst_check(sk, 0); if (!rt) { struct net *net = sock_net(sk); __u8 flow_flags = inet_sk_flowi_flags(sk); fl4 = &fl4_stack; flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos, scope, sk->sk_protocol, flow_flags, faddr, saddr, dport, inet->inet_sport, sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } err = -EACCES; if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) goto out; if (connected) sk_dst_set(sk, dst_clone(&rt->dst)); } if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: saddr = fl4->saddr; if (!ipc.addr) daddr = ipc.addr = fl4->daddr; /* Lockless fast path for the non-corking case. */ if (!corkreq) { struct inet_cork cork; skb = ip_make_skb(sk, fl4, getfrag, msg, ulen, sizeof(struct udphdr), &ipc, &rt, &cork, msg->msg_flags); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) err = udp_send_skb(skb, fl4, &cork); goto out; } lock_sock(sk); if (unlikely(up->pending)) { /* The socket is already corked while preparing it. */ /* ... which is an evident application bug. --ANK */ release_sock(sk); net_dbg_ratelimited("socket already corked\n"); err = -EINVAL; goto out; } /* * Now cork the socket to pend data. */ fl4 = &inet->cork.fl.u.ip4; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; fl4->fl4_sport = inet->inet_sport; up->pending = AF_INET; do_append_data: up->len += ulen; err = ip_append_data(sk, fl4, getfrag, msg, ulen, sizeof(struct udphdr), &ipc, &rt, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_flush_pending_frames(sk); else if (!corkreq) err = udp_push_pending_frames(sk); else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) up->pending = 0; release_sock(sk); out: ip_rt_put(rt); out_free: if (free) kfree(ipc.opt); if (!err) return len; /* * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting * ENOBUFS might not be good (it's not tunable per se), but otherwise * we don't have a good statistic (IpOutDiscards but it can be too many * things). We could add another new stat but at least for now that * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { UDP_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS, is_udplite); } return err; do_confirm: if (msg->msg_flags & MSG_PROBE) dst_confirm_neigh(&rt->dst, &fl4->daddr); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; goto out; } EXPORT_SYMBOL(udp_sendmsg); void udp_splice_eof(struct socket *sock) { struct sock *sk = sock->sk; struct udp_sock *up = udp_sk(sk); if (!up->pending || udp_test_bit(CORK, sk)) return; lock_sock(sk); if (up->pending && !udp_test_bit(CORK, sk)) udp_push_pending_frames(sk); release_sock(sk); } EXPORT_SYMBOL_GPL(udp_splice_eof); #define UDP_SKB_IS_STATELESS 0x80000000 /* all head states (dst, sk, nf conntrack) except skb extensions are * cleared by udp_rcv(). * * We need to preserve secpath, if present, to eventually process * IP_CMSG_PASSSEC at recvmsg() time. * * Other extensions can be cleared. */ static bool udp_try_make_stateless(struct sk_buff *skb) { if (!skb_has_extensions(skb)) return true; if (!secpath_exists(skb)) { skb_ext_reset(skb); return true; } return false; } static void udp_set_dev_scratch(struct sk_buff *skb) { struct udp_dev_scratch *scratch = udp_skb_scratch(skb); BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long)); scratch->_tsize_state = skb->truesize; #if BITS_PER_LONG == 64 scratch->len = skb->len; scratch->csum_unnecessary = !!skb_csum_unnecessary(skb); scratch->is_linear = !skb_is_nonlinear(skb); #endif if (udp_try_make_stateless(skb)) scratch->_tsize_state |= UDP_SKB_IS_STATELESS; } static void udp_skb_csum_unnecessary_set(struct sk_buff *skb) { /* We come here after udp_lib_checksum_complete() returned 0. * This means that __skb_checksum_complete() might have * set skb->csum_valid to 1. * On 64bit platforms, we can set csum_unnecessary * to true, but only if the skb is not shared. */ #if BITS_PER_LONG == 64 if (!skb_shared(skb)) udp_skb_scratch(skb)->csum_unnecessary = true; #endif } static int udp_skb_truesize(struct sk_buff *skb) { return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS; } static bool udp_skb_has_head_state(struct sk_buff *skb) { return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS); } /* fully reclaim rmem/fwd memory allocated for skb */ static void udp_rmem_release(struct sock *sk, int size, int partial, bool rx_queue_lock_held) { struct udp_sock *up = udp_sk(sk); struct sk_buff_head *sk_queue; int amt; if (likely(partial)) { up->forward_deficit += size; size = up->forward_deficit; if (size < READ_ONCE(up->forward_threshold) && !skb_queue_empty(&up->reader_queue)) return; } else { size += up->forward_deficit; } up->forward_deficit = 0; /* acquire the sk_receive_queue for fwd allocated memory scheduling, * if the called don't held it already */ sk_queue = &sk->sk_receive_queue; if (!rx_queue_lock_held) spin_lock(&sk_queue->lock); sk_forward_alloc_add(sk, size); amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1); sk_forward_alloc_add(sk, -amt); if (amt) __sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT); atomic_sub(size, &sk->sk_rmem_alloc); /* this can save us from acquiring the rx queue lock on next receive */ skb_queue_splice_tail_init(sk_queue, &up->reader_queue); if (!rx_queue_lock_held) spin_unlock(&sk_queue->lock); } /* Note: called with reader_queue.lock held. * Instead of using skb->truesize here, find a copy of it in skb->dev_scratch * This avoids a cache line miss while receive_queue lock is held. * Look at __udp_enqueue_schedule_skb() to find where this copy is done. */ void udp_skb_destructor(struct sock *sk, struct sk_buff *skb) { prefetch(&skb->data); udp_rmem_release(sk, udp_skb_truesize(skb), 1, false); } EXPORT_SYMBOL(udp_skb_destructor); /* as above, but the caller held the rx queue lock, too */ static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb) { prefetch(&skb->data); udp_rmem_release(sk, udp_skb_truesize(skb), 1, true); } /* Idea of busylocks is to let producers grab an extra spinlock * to relieve pressure on the receive_queue spinlock shared by consumer. * Under flood, this means that only one producer can be in line * trying to acquire the receive_queue spinlock. * These busylock can be allocated on a per cpu manner, instead of a * per socket one (that would consume a cache line per socket) */ static int udp_busylocks_log __read_mostly; static spinlock_t *udp_busylocks __read_mostly; static spinlock_t *busylock_acquire(void *ptr) { spinlock_t *busy; busy = udp_busylocks + hash_ptr(ptr, udp_busylocks_log); spin_lock(busy); return busy; } static void busylock_release(spinlock_t *busy) { if (busy) spin_unlock(busy); } static int udp_rmem_schedule(struct sock *sk, int size) { int delta; delta = size - sk->sk_forward_alloc; if (delta > 0 && !__sk_mem_schedule(sk, delta, SK_MEM_RECV)) return -ENOBUFS; return 0; } int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) { struct sk_buff_head *list = &sk->sk_receive_queue; int rmem, err = -ENOMEM; spinlock_t *busy = NULL; int size; /* try to avoid the costly atomic add/sub pair when the receive * queue is full; always allow at least a packet */ rmem = atomic_read(&sk->sk_rmem_alloc); if (rmem > sk->sk_rcvbuf) goto drop; /* Under mem pressure, it might be helpful to help udp_recvmsg() * having linear skbs : * - Reduce memory overhead and thus increase receive queue capacity * - Less cache line misses at copyout() time * - Less work at consume_skb() (less alien page frag freeing) */ if (rmem > (sk->sk_rcvbuf >> 1)) { skb_condense(skb); busy = busylock_acquire(sk); } size = skb->truesize; udp_set_dev_scratch(skb); /* we drop only if the receive buf is full and the receive * queue contains some other skb */ rmem = atomic_add_return(size, &sk->sk_rmem_alloc); if (rmem > (size + (unsigned int)sk->sk_rcvbuf)) goto uncharge_drop; spin_lock(&list->lock); err = udp_rmem_schedule(sk, size); if (err) { spin_unlock(&list->lock); goto uncharge_drop; } sk_forward_alloc_add(sk, -size); /* no need to setup a destructor, we will explicitly release the * forward allocated memory on dequeue */ sock_skb_set_dropcount(sk, skb); __skb_queue_tail(list, skb); spin_unlock(&list->lock); if (!sock_flag(sk, SOCK_DEAD)) INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk); busylock_release(busy); return 0; uncharge_drop: atomic_sub(skb->truesize, &sk->sk_rmem_alloc); drop: atomic_inc(&sk->sk_drops); busylock_release(busy); return err; } EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); void udp_destruct_common(struct sock *sk) { /* reclaim completely the forward allocated memory */ struct udp_sock *up = udp_sk(sk); unsigned int total = 0; struct sk_buff *skb; skb_queue_splice_tail_init(&sk->sk_receive_queue, &up->reader_queue); while ((skb = __skb_dequeue(&up->reader_queue)) != NULL) { total += skb->truesize; kfree_skb(skb); } udp_rmem_release(sk, total, 0, true); } EXPORT_SYMBOL_GPL(udp_destruct_common); static void udp_destruct_sock(struct sock *sk) { udp_destruct_common(sk); inet_sock_destruct(sk); } int udp_init_sock(struct sock *sk) { udp_lib_init_sock(sk); sk->sk_destruct = udp_destruct_sock; set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); return 0; } void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) { if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) { bool slow = lock_sock_fast(sk); sk_peek_offset_bwd(sk, len); unlock_sock_fast(sk, slow); } if (!skb_unref(skb)) return; /* In the more common cases we cleared the head states previously, * see __udp_queue_rcv_skb(). */ if (unlikely(udp_skb_has_head_state(skb))) skb_release_head_state(skb); __consume_stateless_skb(skb); } EXPORT_SYMBOL_GPL(skb_consume_udp); static struct sk_buff *__first_packet_length(struct sock *sk, struct sk_buff_head *rcvq, int *total) { struct sk_buff *skb; while ((skb = skb_peek(rcvq)) != NULL) { if (udp_lib_checksum_complete(skb)) { __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, IS_UDPLITE(sk)); __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, IS_UDPLITE(sk)); atomic_inc(&sk->sk_drops); __skb_unlink(skb, rcvq); *total += skb->truesize; kfree_skb(skb); } else { udp_skb_csum_unnecessary_set(skb); break; } } return skb; } /** * first_packet_length - return length of first packet in receive queue * @sk: socket * * Drops all bad checksum frames, until a valid one is found. * Returns the length of found skb, or -1 if none is found. */ static int first_packet_length(struct sock *sk) { struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue; struct sk_buff_head *sk_queue = &sk->sk_receive_queue; struct sk_buff *skb; int total = 0; int res; spin_lock_bh(&rcvq->lock); skb = __first_packet_length(sk, rcvq, &total); if (!skb && !skb_queue_empty_lockless(sk_queue)) { spin_lock(&sk_queue->lock); skb_queue_splice_tail_init(sk_queue, rcvq); spin_unlock(&sk_queue->lock); skb = __first_packet_length(sk, rcvq, &total); } res = skb ? skb->len : -1; if (total) udp_rmem_release(sk, total, 1, false); spin_unlock_bh(&rcvq->lock); return res; } /* * IOCTL requests applicable to the UDP protocol */ int udp_ioctl(struct sock *sk, int cmd, int *karg) { switch (cmd) { case SIOCOUTQ: { *karg = sk_wmem_alloc_get(sk); return 0; } case SIOCINQ: { *karg = max_t(int, 0, first_packet_length(sk)); return 0; } default: return -ENOIOCTLCMD; } return 0; } EXPORT_SYMBOL(udp_ioctl); struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int *off, int *err) { struct sk_buff_head *sk_queue = &sk->sk_receive_queue; struct sk_buff_head *queue; struct sk_buff *last; long timeo; int error; queue = &udp_sk(sk)->reader_queue; timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { struct sk_buff *skb; error = sock_error(sk); if (error) break; error = -EAGAIN; do { spin_lock_bh(&queue->lock); skb = __skb_try_recv_from_queue(sk, queue, flags, off, err, &last); if (skb) { if (!(flags & MSG_PEEK)) udp_skb_destructor(sk, skb); spin_unlock_bh(&queue->lock); return skb; } if (skb_queue_empty_lockless(sk_queue)) { spin_unlock_bh(&queue->lock); goto busy_check; } /* refill the reader queue and walk it again * keep both queues locked to avoid re-acquiring * the sk_receive_queue lock if fwd memory scheduling * is needed. */ spin_lock(&sk_queue->lock); skb_queue_splice_tail_init(sk_queue, queue); skb = __skb_try_recv_from_queue(sk, queue, flags, off, err, &last); if (skb && !(flags & MSG_PEEK)) udp_skb_dtor_locked(sk, skb); spin_unlock(&sk_queue->lock); spin_unlock_bh(&queue->lock); if (skb) return skb; busy_check: if (!sk_can_busy_loop(sk)) break; sk_busy_loop(sk, flags & MSG_DONTWAIT); } while (!skb_queue_empty_lockless(sk_queue)); /* sk_queue is empty, reader_queue may contain peeked packets */ } while (timeo && !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, &error, &timeo, (struct sk_buff *)sk_queue)); *err = error; return NULL; } EXPORT_SYMBOL(__skb_recv_udp); int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { struct sk_buff *skb; int err; try_again: skb = skb_recv_udp(sk, MSG_DONTWAIT, &err); if (!skb) return err; if (udp_lib_checksum_complete(skb)) { int is_udplite = IS_UDPLITE(sk); struct net *net = sock_net(sk); __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, is_udplite); __UDP_INC_STATS(net, UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); kfree_skb(skb); goto try_again; } WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); return recv_actor(sk, skb); } EXPORT_SYMBOL(udp_read_skb); /* * This should be easy, if there is something there we * return it, otherwise we block. */ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct sk_buff *skb; unsigned int ulen, copied; int off, err, peeking = flags & MSG_PEEK; int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; if (flags & MSG_ERRQUEUE) return ip_recv_error(sk, msg, len, addr_len); try_again: off = sk_peek_offset(sk, flags); skb = __skb_recv_udp(sk, flags, &off, &err); if (!skb) return err; ulen = udp_skb_len(skb); copied = len; if (copied > ulen - off) copied = ulen - off; else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; /* * If checksum is needed at all, try to do it while copying the * data. If the data is truncated, or if we only want a partial * coverage checksum (UDP-Lite), do it before the copy. */ if (copied < ulen || peeking || (is_udplite && UDP_SKB_CB(skb)->partial_cov)) { checksum_valid = udp_skb_csum_unnecessary(skb) || !__udp_lib_checksum_complete(skb); if (!checksum_valid) goto csum_copy_err; } if (checksum_valid || udp_skb_csum_unnecessary(skb)) { if (udp_skb_is_linear(skb)) err = copy_linear_skb(skb, copied, off, &msg->msg_iter); else err = skb_copy_datagram_msg(skb, off, msg, copied); } else { err = skb_copy_and_csum_datagram_msg(skb, off, msg); if (err == -EINVAL) goto csum_copy_err; } if (unlikely(err)) { if (!peeking) { atomic_inc(&sk->sk_drops); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } kfree_skb(skb); return err; } if (!peeking) UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); sock_recv_cmsgs(msg, sk, skb); /* Copy the address. */ if (sin) { sin->sin_family = AF_INET; sin->sin_port = udp_hdr(skb)->source; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); *addr_len = sizeof(*sin); BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, (struct sockaddr *)sin, addr_len); } if (udp_test_bit(GRO_ENABLED, sk)) udp_cmsg_recv(msg, sk, skb); if (inet_cmsg_flags(inet)) ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); err = copied; if (flags & MSG_TRUNC) err = ulen; skb_consume_udp(sk, skb, peeking ? -err : err); return err; csum_copy_err: if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags, udp_skb_destructor)) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } kfree_skb(skb); /* starting over for a new packet, but check if we need to yield */ cond_resched(); msg->msg_flags &= ~MSG_TRUNC; goto try_again; } int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { /* This check is replicated from __ip4_datagram_connect() and * intended to prevent BPF program called below from accessing bytes * that are out of the bound specified by user in addr_len. */ if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } EXPORT_SYMBOL(udp_pre_connect); int __udp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); /* * 1003.1g - break association. */ sk->sk_state = TCP_CLOSE; inet->inet_daddr = 0; inet->inet_dport = 0; sock_rps_reset_rxhash(sk); sk->sk_bound_dev_if = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) { inet_reset_saddr(sk); if (sk->sk_prot->rehash && (sk->sk_userlocks & SOCK_BINDPORT_LOCK)) sk->sk_prot->rehash(sk); } if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { sk->sk_prot->unhash(sk); inet->inet_sport = 0; } sk_dst_reset(sk); return 0; } EXPORT_SYMBOL(__udp_disconnect); int udp_disconnect(struct sock *sk, int flags) { lock_sock(sk); __udp_disconnect(sk, flags); release_sock(sk); return 0; } EXPORT_SYMBOL(udp_disconnect); void udp_lib_unhash(struct sock *sk) { if (sk_hashed(sk)) { struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2; hslot = udp_hashslot(udptable, sock_net(sk), udp_sk(sk)->udp_port_hash); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); spin_lock_bh(&hslot->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); if (sk_del_node_init_rcu(sk)) { hslot->count--; inet_sk(sk)->inet_num = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_lock(&hslot2->lock); hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); hslot2->count--; spin_unlock(&hslot2->lock); } spin_unlock_bh(&hslot->lock); } } EXPORT_SYMBOL(udp_lib_unhash); /* * inet_rcv_saddr was changed, we must rehash secondary hash */ void udp_lib_rehash(struct sock *sk, u16 newhash) { if (sk_hashed(sk)) { struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2, *nhslot2; hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); nhslot2 = udp_hashslot2(udptable, newhash); udp_sk(sk)->udp_portaddr_hash = newhash; if (hslot2 != nhslot2 || rcu_access_pointer(sk->sk_reuseport_cb)) { hslot = udp_hashslot(udptable, sock_net(sk), udp_sk(sk)->udp_port_hash); /* we must lock primary chain too */ spin_lock_bh(&hslot->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); if (hslot2 != nhslot2) { spin_lock(&hslot2->lock); hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); hslot2->count--; spin_unlock(&hslot2->lock); spin_lock(&nhslot2->lock); hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, &nhslot2->head); nhslot2->count++; spin_unlock(&nhslot2->lock); } spin_unlock_bh(&hslot->lock); } } } EXPORT_SYMBOL(udp_lib_rehash); void udp_v4_rehash(struct sock *sk) { u16 new_hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num); udp_lib_rehash(sk, new_hash); } static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; if (inet_sk(sk)->inet_daddr) { sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); sk_incoming_cpu_update(sk); } else { sk_mark_napi_id_once(sk, skb); } rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); int drop_reason; /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) { UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; } else { UDP_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS, is_udplite); drop_reason = SKB_DROP_REASON_PROTO_MEM; } UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb_reason(skb, drop_reason); trace_udp_fail_queue_rcv_skb(rc, sk); return -1; } return 0; } /* returns: * -1: error * 0: success * >0: "udp encap" protocol resubmission * * Note that in the success and error cases, the skb is assumed to * have either been requeued or freed. */ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { int drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); /* * Charge it to the socket, dropping if the queue is full. */ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { drop_reason = SKB_DROP_REASON_XFRM_POLICY; goto drop; } nf_reset_ct(skb); if (static_branch_unlikely(&udp_encap_needed_key) && READ_ONCE(up->encap_type)) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); /* * This is an encapsulation socket so pass the skb to * the socket's udp_encap_rcv() hook. Otherwise, just * fall through and pass this up the UDP socket. * up->encap_rcv() returns the following value: * =0 if skb was successfully passed to the encap * handler or was discarded by it. * >0 if skb should be passed on to UDP. * <0 if skb should be resubmitted as proto -N */ /* if we're overly short, let UDP handle it */ encap_rcv = READ_ONCE(up->encap_rcv); if (encap_rcv) { int ret; /* Verify checksum before giving to encap */ if (udp_lib_checksum_complete(skb)) goto csum_error; ret = encap_rcv(sk, skb); if (ret <= 0) { __UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); return -ret; } } /* FALLTHROUGH -- it's a UDP Packet */ } /* * UDP-Lite specific tests, ignored on UDP sockets */ if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) { u16 pcrlen = READ_ONCE(up->pcrlen); /* * MIB statistics other than incrementing the error count are * disabled for the following two types of errors: these depend * on the application settings, not on the functioning of the * protocol stack as such. * * RFC 3828 here recommends (sec 3.3): "There should also be a * way ... to ... at least let the receiving application block * delivery of packets with coverage values less than a value * provided by the application." */ if (pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n", UDP_SKB_CB(skb)->cscov, skb->len); goto drop; } /* The next case involves violating the min. coverage requested * by the receiver. This is subtle: if receiver wants x and x is * greater than the buffersize/MTU then receiver will complain * that it wants x while sender emits packets of smaller size y. * Therefore the above ...()->partial_cov statement is essential. */ if (UDP_SKB_CB(skb)->cscov < pcrlen) { net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n", UDP_SKB_CB(skb)->cscov, pcrlen); goto drop; } } prefetch(&sk->sk_rmem_alloc); if (rcu_access_pointer(sk->sk_filter) && udp_lib_checksum_complete(skb)) goto csum_error; if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) { drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto drop; } udp_csum_pull_header(skb); ipv4_pktinfo_prepare(sk, skb); return __udp_queue_rcv_skb(sk, skb); csum_error: drop_reason = SKB_DROP_REASON_UDP_CSUM; __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); kfree_skb_reason(skb, drop_reason); return -1; } static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct sk_buff *next, *segs; int ret; if (likely(!udp_unexpected_gso(sk, skb))) return udp_queue_rcv_one_skb(sk, skb); BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_GSO_CB_OFFSET); __skb_push(skb, -skb_mac_offset(skb)); segs = udp_rcv_segment(sk, skb, true); skb_list_walk_safe(segs, skb, next) { __skb_pull(skb, skb_transport_offset(skb)); udp_post_segment_fix_csum(skb); ret = udp_queue_rcv_one_skb(sk, skb); if (ret > 0) ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret); } return 0; } /* For TCP sockets, sk_rx_dst is protected by socket lock * For UDP, we use xchg() to guard against concurrent changes. */ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { struct dst_entry *old; if (dst_hold_safe(dst)) { old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst); dst_release(old); return old != dst; } return false; } EXPORT_SYMBOL(udp_sk_rx_dst_set); /* * Multicasts and broadcasts go to each listener. * * Note: called only from the BH handler context. */ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udphdr *uh, __be32 saddr, __be32 daddr, struct udp_table *udptable, int proto) { struct sock *sk, *first = NULL; unsigned short hnum = ntohs(uh->dest); struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); unsigned int offset = offsetof(typeof(*sk), sk_node); int dif = skb->dev->ifindex; int sdif = inet_sdif(skb); struct hlist_node *node; struct sk_buff *nskb; if (use_hash2) { hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & udptable->mask; hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: hslot = &udptable->hash2[hash2]; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr, uh->source, saddr, dif, sdif, hnum)) continue; if (!first) { first = sk; continue; } nskb = skb_clone(skb, GFP_ATOMIC); if (unlikely(!nskb)) { atomic_inc(&sk->sk_drops); __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); __UDP_INC_STATS(net, UDP_MIB_INERRORS, IS_UDPLITE(sk)); continue; } if (udp_queue_rcv_skb(sk, nskb) > 0) consume_skb(nskb); } /* Also lookup *:port if we are using hash2 and haven't done so yet. */ if (use_hash2 && hash2 != hash2_any) { hash2 = hash2_any; goto start_lookup; } if (first) { if (udp_queue_rcv_skb(first, skb) > 0) consume_skb(skb); } else { kfree_skb(skb); __UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI, proto == IPPROTO_UDPLITE); } return 0; } /* Initialize UDP checksum. If exited with zero value (success), * CHECKSUM_UNNECESSARY means, that no more checks are required. * Otherwise, csum completion requires checksumming packet body, * including udp header and folding it to skb->csum. */ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) { int err; UDP_SKB_CB(skb)->partial_cov = 0; UDP_SKB_CB(skb)->cscov = skb->len; if (proto == IPPROTO_UDPLITE) { err = udplite_checksum_init(skb, uh); if (err) return err; if (UDP_SKB_CB(skb)->partial_cov) { skb->csum = inet_compute_pseudo(skb, proto); return 0; } } /* Note, we are only interested in != 0 or == 0, thus the * force to int. */ err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, inet_compute_pseudo); if (err) return err; if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { /* If SW calculated the value, we know it's bad */ if (skb->csum_complete_sw) return 1; /* HW says the value is bad. Let's validate that. * skb->csum is no longer the full packet checksum, * so don't treat it as such. */ skb_checksum_complete_unset(skb); } return 0; } /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and * return code conversion for ip layer consumption */ static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, struct udphdr *uh) { int ret; if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) skb_checksum_try_convert(skb, IPPROTO_UDP, inet_compute_pseudo); ret = udp_queue_rcv_skb(sk, skb); /* a return value > 0 means to resubmit the input, but * it wants the return to be -protocol, or 0 */ if (ret > 0) return -ret; return 0; } /* * All we need to do is get the socket, and then do a checksum. */ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { struct sock *sk; struct udphdr *uh; unsigned short ulen; struct rtable *rt = skb_rtable(skb); __be32 saddr, daddr; struct net *net = dev_net(skb->dev); bool refcounted; int drop_reason; drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; /* * Validate the packet. */ if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto drop; /* No space for header. */ uh = udp_hdr(skb); ulen = ntohs(uh->len); saddr = ip_hdr(skb)->saddr; daddr = ip_hdr(skb)->daddr; if (ulen > skb->len) goto short_packet; if (proto == IPPROTO_UDP) { /* UDP validates ulen. */ if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) goto short_packet; uh = udp_hdr(skb); } if (udp4_csum_init(skb, uh, proto)) goto csum_error; sk = inet_steal_sock(net, skb, sizeof(struct udphdr), saddr, uh->source, daddr, uh->dest, &refcounted, udp_ehashfn); if (IS_ERR(sk)) goto no_sk; if (sk) { struct dst_entry *dst = skb_dst(skb); int ret; if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp_sk_rx_dst_set(sk, dst); ret = udp_unicast_rcv_skb(sk, skb, uh); if (refcounted) sock_put(sk); return ret; } if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return __udp4_lib_mcast_deliver(net, skb, uh, saddr, daddr, udptable, proto); sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk) return udp_unicast_rcv_skb(sk, skb, uh); no_sk: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; nf_reset_ct(skb); /* No socket. Drop packet silently, if checksum is wrong */ if (udp_lib_checksum_complete(skb)) goto csum_error; drop_reason = SKB_DROP_REASON_NO_SOCKET; __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* * Hmm. We got an UDP packet to a port to which we * don't wanna listen. Ignore it. */ kfree_skb_reason(skb, drop_reason); return 0; short_packet: drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), ulen, skb->len, &daddr, ntohs(uh->dest)); goto drop; csum_error: /* * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ drop_reason = SKB_DROP_REASON_UDP_CSUM; net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), ulen); __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); drop: __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb_reason(skb, drop_reason); return 0; } /* We can only early demux multicast if there is a single matching socket. * If more than one socket found returns NULL */ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, int dif, int sdif) { struct udp_table *udptable = net->ipv4.udp_table; unsigned short hnum = ntohs(loc_port); struct sock *sk, *result; struct udp_hslot *hslot; unsigned int slot; slot = udp_hashfn(net, hnum, udptable->mask); hslot = &udptable->hash[slot]; /* Do not bother scanning a too big list */ if (hslot->count > 10) return NULL; result = NULL; sk_for_each_rcu(sk, &hslot->head) { if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr, rmt_port, rmt_addr, dif, sdif, hnum)) { if (result) return NULL; result = sk; } } return result; } /* For unicast we should only early demux connected sockets or we can * break forwarding setups. The chains here can be long so only check * if the first socket is an exact match and if not move on. */ static struct sock *__udp4_lib_demux_lookup(struct net *net, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, int dif, int sdif) { struct udp_table *udptable = net->ipv4.udp_table; INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); unsigned short hnum = ntohs(loc_port); unsigned int hash2, slot2; struct udp_hslot *hslot2; __portpair ports; struct sock *sk; hash2 = ipv4_portaddr_hash(net, loc_addr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; ports = INET_COMBINED_PORTS(rmt_port, hnum); udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { if (inet_match(net, sk, acookie, ports, dif, sdif)) return sk; /* Only check first socket in chain */ break; } return NULL; } int udp_v4_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); struct in_device *in_dev = NULL; const struct iphdr *iph; const struct udphdr *uh; struct sock *sk = NULL; struct dst_entry *dst; int dif = skb->dev->ifindex; int sdif = inet_sdif(skb); int ours; /* validate the packet */ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) return 0; iph = ip_hdr(skb); uh = udp_hdr(skb); if (skb->pkt_type == PACKET_MULTICAST) { in_dev = __in_dev_get_rcu(skb->dev); if (!in_dev) return 0; ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, iph->protocol); if (!ours) return 0; sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif, sdif); } else if (skb->pkt_type == PACKET_HOST) { sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif, sdif); } if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) return 0; skb->sk = sk; skb->destructor = sock_efree; dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); if (dst) { u32 itag = 0; /* set noref for now. * any place which wants to hold dst has to call * dst_hold_safe() */ skb_dst_set_noref(skb, dst); /* for unconnected multicast sockets we need to validate * the source on each packet */ if (!inet_sk(sk)->inet_daddr && in_dev) return ip_mc_validate_source(skb, iph->daddr, iph->saddr, iph->tos & IPTOS_RT_MASK, skb->dev, in_dev, &itag); } return 0; } int udp_rcv(struct sk_buff *skb) { return __udp4_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP); } void udp_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); bool slow = lock_sock_fast(sk); /* protects from races with udp_abort() */ sock_set_flag(sk, SOCK_DEAD); udp_flush_pending_frames(sk); unlock_sock_fast(sk, slow); if (static_branch_unlikely(&udp_encap_needed_key)) { if (up->encap_type) { void (*encap_destroy)(struct sock *sk); encap_destroy = READ_ONCE(up->encap_destroy); if (encap_destroy) encap_destroy(sk); } if (udp_test_bit(ENCAP_ENABLED, sk)) static_branch_dec(&udp_encap_needed_key); } } static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family, struct sock *sk) { #ifdef CONFIG_XFRM if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) { if (family == AF_INET) WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv); else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) WRITE_ONCE(udp_sk(sk)->gro_receive, ipv6_stub->xfrm6_gro_udp_encap_rcv); } #endif } /* * Socket option code for UDP */ int udp_lib_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen, int (*push_pending_frames)(struct sock *)) { struct udp_sock *up = udp_sk(sk); int val, valbool; int err = 0; int is_udplite = IS_UDPLITE(sk); if (level == SOL_SOCKET) { err = sk_setsockopt(sk, level, optname, optval, optlen); if (optname == SO_RCVBUF || optname == SO_RCVBUFFORCE) { sockopt_lock_sock(sk); /* paired with READ_ONCE in udp_rmem_release() */ WRITE_ONCE(up->forward_threshold, sk->sk_rcvbuf >> 2); sockopt_release_sock(sk); } return err; } if (optlen < sizeof(int)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; valbool = val ? 1 : 0; switch (optname) { case UDP_CORK: if (val != 0) { udp_set_bit(CORK, sk); } else { udp_clear_bit(CORK, sk); lock_sock(sk); push_pending_frames(sk); release_sock(sk); } break; case UDP_ENCAP: switch (val) { case 0: #ifdef CONFIG_XFRM case UDP_ENCAP_ESPINUDP: set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk); fallthrough; case UDP_ENCAP_ESPINUDP_NON_IKE: #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) WRITE_ONCE(up->encap_rcv, ipv6_stub->xfrm6_udp_encap_rcv); else #endif WRITE_ONCE(up->encap_rcv, xfrm4_udp_encap_rcv); #endif fallthrough; case UDP_ENCAP_L2TPINUDP: WRITE_ONCE(up->encap_type, val); udp_tunnel_encap_enable(sk); break; default: err = -ENOPROTOOPT; break; } break; case UDP_NO_CHECK6_TX: udp_set_no_check6_tx(sk, valbool); break; case UDP_NO_CHECK6_RX: udp_set_no_check6_rx(sk, valbool); break; case UDP_SEGMENT: if (val < 0 || val > USHRT_MAX) return -EINVAL; WRITE_ONCE(up->gso_size, val); break; case UDP_GRO: /* when enabling GRO, accept the related GSO packet type */ if (valbool) udp_tunnel_encap_enable(sk); udp_assign_bit(GRO_ENABLED, sk, valbool); udp_assign_bit(ACCEPT_L4, sk, valbool); set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk); break; /* * UDP-Lite's partial checksum coverage (RFC 3828). */ /* The sender sets actual checksum coverage length via this option. * The case coverage > packet length is handled by send module. */ case UDPLITE_SEND_CSCOV: if (!is_udplite) /* Disable the option on UDP sockets */ return -ENOPROTOOPT; if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ val = 8; else if (val > USHRT_MAX) val = USHRT_MAX; WRITE_ONCE(up->pcslen, val); udp_set_bit(UDPLITE_SEND_CC, sk); break; /* The receiver specifies a minimum checksum coverage value. To make * sense, this should be set to at least 8 (as done below). If zero is * used, this again means full checksum coverage. */ case UDPLITE_RECV_CSCOV: if (!is_udplite) /* Disable the option on UDP sockets */ return -ENOPROTOOPT; if (val != 0 && val < 8) /* Avoid silly minimal values. */ val = 8; else if (val > USHRT_MAX) val = USHRT_MAX; WRITE_ONCE(up->pcrlen, val); udp_set_bit(UDPLITE_RECV_CC, sk); break; default: err = -ENOPROTOOPT; break; } return err; } EXPORT_SYMBOL(udp_lib_setsockopt); int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_push_pending_frames); return ip_setsockopt(sk, level, optname, optval, optlen); } int udp_lib_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { struct udp_sock *up = udp_sk(sk); int val, len; if (get_user(len, optlen)) return -EFAULT; len = min_t(unsigned int, len, sizeof(int)); if (len < 0) return -EINVAL; switch (optname) { case UDP_CORK: val = udp_test_bit(CORK, sk); break; case UDP_ENCAP: val = READ_ONCE(up->encap_type); break; case UDP_NO_CHECK6_TX: val = udp_get_no_check6_tx(sk); break; case UDP_NO_CHECK6_RX: val = udp_get_no_check6_rx(sk); break; case UDP_SEGMENT: val = READ_ONCE(up->gso_size); break; case UDP_GRO: val = udp_test_bit(GRO_ENABLED, sk); break; /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: val = READ_ONCE(up->pcslen); break; case UDPLITE_RECV_CSCOV: val = READ_ONCE(up->pcrlen); break; default: return -ENOPROTOOPT; } if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } EXPORT_SYMBOL(udp_lib_getsockopt); int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return ip_getsockopt(sk, level, optname, optval, optlen); } /** * udp_poll - wait for a UDP event. * @file: - file struct * @sock: - socket * @wait: - poll table * * This is same as datagram poll, except for the special case of * blocking sockets. If application is using a blocking fd * and a packet with checksum error is in the queue; * then it could get return from select indicating data available * but then block when reading it. Add special case code * to work around these arguably broken applications. */ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) { __poll_t mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Check for false positives due to checksum errors */ if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) && !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) mask &= ~(EPOLLIN | EPOLLRDNORM); /* psock ingress_msg queue should not contain any bad checksum frames */ if (sk_is_readable(sk)) mask |= EPOLLIN | EPOLLRDNORM; return mask; } EXPORT_SYMBOL(udp_poll); int udp_abort(struct sock *sk, int err) { if (!has_current_bpf_ctx()) lock_sock(sk); /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing * with close() */ if (sock_flag(sk, SOCK_DEAD)) goto out; sk->sk_err = err; sk_error_report(sk); __udp_disconnect(sk, 0); out: if (!has_current_bpf_ctx()) release_sock(sk); return 0; } EXPORT_SYMBOL_GPL(udp_abort); struct proto udp_prot = { .name = "UDP", .owner = THIS_MODULE, .close = udp_lib_close, .pre_connect = udp_pre_connect, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, .init = udp_init_sock, .destroy = udp_destroy_sock, .setsockopt = udp_setsockopt, .getsockopt = udp_getsockopt, .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .splice_eof = udp_splice_eof, .release_cb = ip4_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, .put_port = udp_lib_unhash, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = udp_bpf_update_proto, #endif .memory_allocated = &udp_memory_allocated, .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, .sysctl_mem = sysctl_udp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp_sock), .h.udp_table = NULL, .diag_destroy = udp_abort, }; EXPORT_SYMBOL(udp_prot); /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS static unsigned short seq_file_family(const struct seq_file *seq); static bool seq_sk_match(struct seq_file *seq, const struct sock *sk) { unsigned short family = seq_file_family(seq); /* AF_UNSPEC is used as a match all */ return ((family == AF_UNSPEC || family == sk->sk_family) && net_eq(sock_net(sk), seq_file_net(seq))); } #ifdef CONFIG_BPF_SYSCALL static const struct seq_operations bpf_iter_udp_seq_ops; #endif static struct udp_table *udp_get_table_seq(struct seq_file *seq, struct net *net) { const struct udp_seq_afinfo *afinfo; #ifdef CONFIG_BPF_SYSCALL if (seq->op == &bpf_iter_udp_seq_ops) return net->ipv4.udp_table; #endif afinfo = pde_data(file_inode(seq->file)); return afinfo->udp_table ? : net->ipv4.udp_table; } static struct sock *udp_get_first(struct seq_file *seq, int start) { struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); struct udp_table *udptable; struct sock *sk; udptable = udp_get_table_seq(seq, net); for (state->bucket = start; state->bucket <= udptable->mask; ++state->bucket) { struct udp_hslot *hslot = &udptable->hash[state->bucket]; if (hlist_empty(&hslot->head)) continue; spin_lock_bh(&hslot->lock); sk_for_each(sk, &hslot->head) { if (seq_sk_match(seq, sk)) goto found; } spin_unlock_bh(&hslot->lock); } sk = NULL; found: return sk; } static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) { struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); struct udp_table *udptable; do { sk = sk_next(sk); } while (sk && !seq_sk_match(seq, sk)); if (!sk) { udptable = udp_get_table_seq(seq, net); if (state->bucket <= udptable->mask) spin_unlock_bh(&udptable->hash[state->bucket].lock); return udp_get_first(seq, state->bucket + 1); } return sk; } static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) { struct sock *sk = udp_get_first(seq, 0); if (sk) while (pos && (sk = udp_get_next(seq, sk)) != NULL) --pos; return pos ? NULL : sk; } void *udp_seq_start(struct seq_file *seq, loff_t *pos) { struct udp_iter_state *state = seq->private; state->bucket = MAX_UDP_PORTS; return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } EXPORT_SYMBOL(udp_seq_start); void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *sk; if (v == SEQ_START_TOKEN) sk = udp_get_idx(seq, 0); else sk = udp_get_next(seq, v); ++*pos; return sk; } EXPORT_SYMBOL(udp_seq_next); void udp_seq_stop(struct seq_file *seq, void *v) { struct udp_iter_state *state = seq->private; struct udp_table *udptable; udptable = udp_get_table_seq(seq, seq_file_net(seq)); if (state->bucket <= udptable->mask) spin_unlock_bh(&udptable->hash[state->bucket].lock); } EXPORT_SYMBOL(udp_seq_stop); /* ------------------------------------------------------------------------ */ static void udp4_format_sock(struct sock *sp, struct seq_file *f, int bucket) { struct inet_sock *inet = inet_sk(sp); __be32 dest = inet->inet_daddr; __be32 src = inet->inet_rcv_saddr; __u16 destp = ntohs(inet->inet_dport); __u16 srcp = ntohs(inet->inet_sport); seq_printf(f, "%5d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u", bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), udp_rqueue_get(sp), 0, 0L, 0, from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 0, sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } int udp4_seq_show(struct seq_file *seq, void *v) { seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode ref pointer drops"); else { struct udp_iter_state *state = seq->private; udp4_format_sock(v, seq, state->bucket); } seq_pad(seq, '\n'); return 0; } #ifdef CONFIG_BPF_SYSCALL struct bpf_iter__udp { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct udp_sock *, udp_sk); uid_t uid __aligned(8); int bucket __aligned(8); }; struct bpf_udp_iter_state { struct udp_iter_state state; unsigned int cur_sk; unsigned int end_sk; unsigned int max_sk; int offset; struct sock **batch; bool st_bucket_done; }; static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter, unsigned int new_batch_sz); static struct sock *bpf_iter_udp_batch(struct seq_file *seq) { struct bpf_udp_iter_state *iter = seq->private; struct udp_iter_state *state = &iter->state; struct net *net = seq_file_net(seq); struct udp_table *udptable; unsigned int batch_sks = 0; bool resized = false; struct sock *sk; /* The current batch is done, so advance the bucket. */ if (iter->st_bucket_done) { state->bucket++; iter->offset = 0; } udptable = udp_get_table_seq(seq, net); again: /* New batch for the next bucket. * Iterate over the hash table to find a bucket with sockets matching * the iterator attributes, and return the first matching socket from * the bucket. The remaining matched sockets from the bucket are batched * before releasing the bucket lock. This allows BPF programs that are * called in seq_show to acquire the bucket lock if needed. */ iter->cur_sk = 0; iter->end_sk = 0; iter->st_bucket_done = false; batch_sks = 0; for (; state->bucket <= udptable->mask; state->bucket++) { struct udp_hslot *hslot2 = &udptable->hash2[state->bucket]; if (hlist_empty(&hslot2->head)) { iter->offset = 0; continue; } spin_lock_bh(&hslot2->lock); udp_portaddr_for_each_entry(sk, &hslot2->head) { if (seq_sk_match(seq, sk)) { /* Resume from the last iterated socket at the * offset in the bucket before iterator was stopped. */ if (iter->offset) { --iter->offset; continue; } if (iter->end_sk < iter->max_sk) { sock_hold(sk); iter->batch[iter->end_sk++] = sk; } batch_sks++; } } spin_unlock_bh(&hslot2->lock); if (iter->end_sk) break; /* Reset the current bucket's offset before moving to the next bucket. */ iter->offset = 0; } /* All done: no batch made. */ if (!iter->end_sk) return NULL; if (iter->end_sk == batch_sks) { /* Batching is done for the current bucket; return the first * socket to be iterated from the batch. */ iter->st_bucket_done = true; goto done; } if (!resized && !bpf_iter_udp_realloc_batch(iter, batch_sks * 3 / 2)) { resized = true; /* After allocating a larger batch, retry one more time to grab * the whole bucket. */ state->bucket--; goto again; } done: return iter->batch[0]; } static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bpf_udp_iter_state *iter = seq->private; struct sock *sk; /* Whenever seq_next() is called, the iter->cur_sk is * done with seq_show(), so unref the iter->cur_sk. */ if (iter->cur_sk < iter->end_sk) { sock_put(iter->batch[iter->cur_sk++]); ++iter->offset; } /* After updating iter->cur_sk, check if there are more sockets * available in the current bucket batch. */ if (iter->cur_sk < iter->end_sk) sk = iter->batch[iter->cur_sk]; else /* Prepare a new batch. */ sk = bpf_iter_udp_batch(seq); ++*pos; return sk; } static void *bpf_iter_udp_seq_start(struct seq_file *seq, loff_t *pos) { /* bpf iter does not support lseek, so it always * continue from where it was stop()-ped. */ if (*pos) return bpf_iter_udp_batch(seq); return SEQ_START_TOKEN; } static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, struct udp_sock *udp_sk, uid_t uid, int bucket) { struct bpf_iter__udp ctx; meta->seq_num--; /* skip SEQ_START_TOKEN */ ctx.meta = meta; ctx.udp_sk = udp_sk; ctx.uid = uid; ctx.bucket = bucket; return bpf_iter_run_prog(prog, &ctx); } static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v) { struct udp_iter_state *state = seq->private; struct bpf_iter_meta meta; struct bpf_prog *prog; struct sock *sk = v; uid_t uid; int ret; if (v == SEQ_START_TOKEN) return 0; lock_sock(sk); if (unlikely(sk_unhashed(sk))) { ret = SEQ_SKIP; goto unlock; } uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); meta.seq = seq; prog = bpf_iter_get_info(&meta, false); ret = udp_prog_seq_show(prog, &meta, v, uid, state->bucket); unlock: release_sock(sk); return ret; } static void bpf_iter_udp_put_batch(struct bpf_udp_iter_state *iter) { while (iter->cur_sk < iter->end_sk) sock_put(iter->batch[iter->cur_sk++]); } static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v) { struct bpf_udp_iter_state *iter = seq->private; struct bpf_iter_meta meta; struct bpf_prog *prog; if (!v) { meta.seq = seq; prog = bpf_iter_get_info(&meta, true); if (prog) (void)udp_prog_seq_show(prog, &meta, v, 0, 0); } if (iter->cur_sk < iter->end_sk) { bpf_iter_udp_put_batch(iter); iter->st_bucket_done = false; } } static const struct seq_operations bpf_iter_udp_seq_ops = { .start = bpf_iter_udp_seq_start, .next = bpf_iter_udp_seq_next, .stop = bpf_iter_udp_seq_stop, .show = bpf_iter_udp_seq_show, }; #endif static unsigned short seq_file_family(const struct seq_file *seq) { const struct udp_seq_afinfo *afinfo; #ifdef CONFIG_BPF_SYSCALL /* BPF iterator: bpf programs to filter sockets. */ if (seq->op == &bpf_iter_udp_seq_ops) return AF_UNSPEC; #endif /* Proc fs iterator */ afinfo = pde_data(file_inode(seq->file)); return afinfo->family; } const struct seq_operations udp_seq_ops = { .start = udp_seq_start, .next = udp_seq_next, .stop = udp_seq_stop, .show = udp4_seq_show, }; EXPORT_SYMBOL(udp_seq_ops); static struct udp_seq_afinfo udp4_seq_afinfo = { .family = AF_INET, .udp_table = NULL, }; static int __net_init udp4_proc_init_net(struct net *net) { if (!proc_create_net_data("udp", 0444, net->proc_net, &udp_seq_ops, sizeof(struct udp_iter_state), &udp4_seq_afinfo)) return -ENOMEM; return 0; } static void __net_exit udp4_proc_exit_net(struct net *net) { remove_proc_entry("udp", net->proc_net); } static struct pernet_operations udp4_net_ops = { .init = udp4_proc_init_net, .exit = udp4_proc_exit_net, }; int __init udp4_proc_init(void) { return register_pernet_subsys(&udp4_net_ops); } void udp4_proc_exit(void) { unregister_pernet_subsys(&udp4_net_ops); } #endif /* CONFIG_PROC_FS */ static __initdata unsigned long uhash_entries; static int __init set_uhash_entries(char *str) { ssize_t ret; if (!str) return 0; ret = kstrtoul(str, 0, &uhash_entries); if (ret) return 0; if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN) uhash_entries = UDP_HTABLE_SIZE_MIN; return 1; } __setup("uhash_entries=", set_uhash_entries); void __init udp_table_init(struct udp_table *table, const char *name) { unsigned int i; table->hash = alloc_large_system_hash(name, 2 * sizeof(struct udp_hslot), uhash_entries, 21, /* one slot per 2 MB */ 0, &table->log, &table->mask, UDP_HTABLE_SIZE_MIN, UDP_HTABLE_SIZE_MAX); table->hash2 = table->hash + (table->mask + 1); for (i = 0; i <= table->mask; i++) { INIT_HLIST_HEAD(&table->hash[i].head); table->hash[i].count = 0; spin_lock_init(&table->hash[i].lock); } for (i = 0; i <= table->mask; i++) { INIT_HLIST_HEAD(&table->hash2[i].head); table->hash2[i].count = 0; spin_lock_init(&table->hash2[i].lock); } } u32 udp_flow_hashrnd(void) { static u32 hashrnd __read_mostly; net_get_random_once(&hashrnd, sizeof(hashrnd)); return hashrnd; } EXPORT_SYMBOL(udp_flow_hashrnd); static void __net_init udp_sysctl_init(struct net *net) { net->ipv4.sysctl_udp_rmem_min = PAGE_SIZE; net->ipv4.sysctl_udp_wmem_min = PAGE_SIZE; #ifdef CONFIG_NET_L3_MASTER_DEV net->ipv4.sysctl_udp_l3mdev_accept = 0; #endif } static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_entries) { struct udp_table *udptable; int i; udptable = kmalloc(sizeof(*udptable), GFP_KERNEL); if (!udptable) goto out; udptable->hash = vmalloc_huge(hash_entries * 2 * sizeof(struct udp_hslot), GFP_KERNEL_ACCOUNT); if (!udptable->hash) goto free_table; udptable->hash2 = udptable->hash + hash_entries; udptable->mask = hash_entries - 1; udptable->log = ilog2(hash_entries); for (i = 0; i < hash_entries; i++) { INIT_HLIST_HEAD(&udptable->hash[i].head); udptable->hash[i].count = 0; spin_lock_init(&udptable->hash[i].lock); INIT_HLIST_HEAD(&udptable->hash2[i].head); udptable->hash2[i].count = 0; spin_lock_init(&udptable->hash2[i].lock); } return udptable; free_table: kfree(udptable); out: return NULL; } static void __net_exit udp_pernet_table_free(struct net *net) { struct udp_table *udptable = net->ipv4.udp_table; if (udptable == &udp_table) return; kvfree(udptable->hash); kfree(udptable); } static void __net_init udp_set_table(struct net *net) { struct udp_table *udptable; unsigned int hash_entries; struct net *old_net; if (net_eq(net, &init_net)) goto fallback; old_net = current->nsproxy->net_ns; hash_entries = READ_ONCE(old_net->ipv4.sysctl_udp_child_hash_entries); if (!hash_entries) goto fallback; /* Set min to keep the bitmap on stack in udp_lib_get_port() */ if (hash_entries < UDP_HTABLE_SIZE_MIN_PERNET) hash_entries = UDP_HTABLE_SIZE_MIN_PERNET; else hash_entries = roundup_pow_of_two(hash_entries); udptable = udp_pernet_table_alloc(hash_entries); if (udptable) { net->ipv4.udp_table = udptable; } else { pr_warn("Failed to allocate UDP hash table (entries: %u) " "for a netns, fallback to the global one\n", hash_entries); fallback: net->ipv4.udp_table = &udp_table; } } static int __net_init udp_pernet_init(struct net *net) { udp_sysctl_init(net); udp_set_table(net); return 0; } static void __net_exit udp_pernet_exit(struct net *net) { udp_pernet_table_free(net); } static struct pernet_operations __net_initdata udp_sysctl_ops = { .init = udp_pernet_init, .exit = udp_pernet_exit, }; #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta, struct udp_sock *udp_sk, uid_t uid, int bucket) static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter, unsigned int new_batch_sz) { struct sock **new_batch; new_batch = kvmalloc_array(new_batch_sz, sizeof(*new_batch), GFP_USER | __GFP_NOWARN); if (!new_batch) return -ENOMEM; bpf_iter_udp_put_batch(iter); kvfree(iter->batch); iter->batch = new_batch; iter->max_sk = new_batch_sz; return 0; } #define INIT_BATCH_SZ 16 static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) { struct bpf_udp_iter_state *iter = priv_data; int ret; ret = bpf_iter_init_seq_net(priv_data, aux); if (ret) return ret; ret = bpf_iter_udp_realloc_batch(iter, INIT_BATCH_SZ); if (ret) bpf_iter_fini_seq_net(priv_data); return ret; } static void bpf_iter_fini_udp(void *priv_data) { struct bpf_udp_iter_state *iter = priv_data; bpf_iter_fini_seq_net(priv_data); kvfree(iter->batch); } static const struct bpf_iter_seq_info udp_seq_info = { .seq_ops = &bpf_iter_udp_seq_ops, .init_seq_private = bpf_iter_init_udp, .fini_seq_private = bpf_iter_fini_udp, .seq_priv_size = sizeof(struct bpf_udp_iter_state), }; static struct bpf_iter_reg udp_reg_info = { .target = "udp", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__udp, udp_sk), PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, }, .seq_info = &udp_seq_info, }; static void __init bpf_iter_register(void) { udp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UDP]; if (bpf_iter_reg_target(&udp_reg_info)) pr_warn("Warning: could not register bpf iterator udp\n"); } #endif void __init udp_init(void) { unsigned long limit; unsigned int i; udp_table_init(&udp_table, "UDP"); limit = nr_free_buffer_pages() / 8; limit = max(limit, 128UL); sysctl_udp_mem[0] = limit / 4 * 3; sysctl_udp_mem[1] = limit; sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; /* 16 spinlocks per cpu */ udp_busylocks_log = ilog2(nr_cpu_ids) + 4; udp_busylocks = kmalloc(sizeof(spinlock_t) << udp_busylocks_log, GFP_KERNEL); if (!udp_busylocks) panic("UDP: failed to alloc udp_busylocks\n"); for (i = 0; i < (1U << udp_busylocks_log); i++) spin_lock_init(udp_busylocks + i); if (register_pernet_subsys(&udp_sysctl_ops)) panic("UDP: failed to init sysctl parameters.\n"); #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) bpf_iter_register(); #endif } |
1263 665 2 666 1 187 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_KSM_H #define __LINUX_KSM_H /* * Memory merging support. * * This code enables dynamic sharing of identical pages found in different * memory areas, even if they are not shared by fork(). */ #include <linux/bitops.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/rmap.h> #include <linux/sched.h> #include <linux/sched/coredump.h> #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); void ksm_add_vma(struct vm_area_struct *vma); int ksm_enable_merge_any(struct mm_struct *mm); int ksm_disable_merge_any(struct mm_struct *mm); int ksm_disable(struct mm_struct *mm); int __ksm_enter(struct mm_struct *mm); void __ksm_exit(struct mm_struct *mm); /* * To identify zeropages that were mapped by KSM, we reuse the dirty bit * in the PTE. If the PTE is dirty, the zeropage was mapped by KSM when * deduplicating memory. */ #define is_ksm_zero_pte(pte) (is_zero_pfn(pte_pfn(pte)) && pte_dirty(pte)) extern unsigned long ksm_zero_pages; static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte) { if (is_ksm_zero_pte(pte)) { ksm_zero_pages--; mm->ksm_zero_pages--; } } static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { int ret; if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) { ret = __ksm_enter(mm); if (ret) return ret; } if (test_bit(MMF_VM_MERGE_ANY, &oldmm->flags)) set_bit(MMF_VM_MERGE_ANY, &mm->flags); return 0; } static inline void ksm_exit(struct mm_struct *mm) { if (test_bit(MMF_VM_MERGEABLE, &mm->flags)) __ksm_exit(mm); } /* * When do_swap_page() first faults in from swap what used to be a KSM page, * no problem, it will be assigned to this vma's anon_vma; but thereafter, * it might be faulted into a different anon_vma (or perhaps to a different * offset in the same anon_vma). do_swap_page() cannot do all the locking * needed to reconstitute a cross-anon_vma KSM page: for now it has to make * a copy, and leave remerging the pages to a later pass of ksmd. * * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE, * but what if the vma was unmerged while the page was swapped out? */ struct page *ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address); void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc); void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); #ifdef CONFIG_MEMORY_FAILURE void collect_procs_ksm(struct page *page, struct list_head *to_kill, int force_early); #endif #ifdef CONFIG_PROC_FS long ksm_process_profit(struct mm_struct *); #endif /* CONFIG_PROC_FS */ #else /* !CONFIG_KSM */ static inline void ksm_add_vma(struct vm_area_struct *vma) { } static inline int ksm_disable(struct mm_struct *mm) { return 0; } static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { return 0; } static inline void ksm_exit(struct mm_struct *mm) { } static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte) { } #ifdef CONFIG_MEMORY_FAILURE static inline void collect_procs_ksm(struct page *page, struct list_head *to_kill, int force_early) { } #endif #ifdef CONFIG_MMU static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) { return 0; } static inline struct page *ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address) { return page; } static inline void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc) { } static inline void folio_migrate_ksm(struct folio *newfolio, struct folio *old) { } #endif /* CONFIG_MMU */ #endif /* !CONFIG_KSM */ #endif /* __LINUX_KSM_H */ |
222 224 17 4 4 222 386 385 51 9 15 269 102 168 37 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_queue.h> #include <net/ip6_checksum.h> #ifdef CONFIG_INET __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u8 protocol) { const struct iphdr *iph = ip_hdr(skb); __sum16 csum = 0; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN) break; if ((protocol != IPPROTO_TCP && protocol != IPPROTO_UDP && !csum_fold(skb->csum)) || !csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len - dataoff, protocol, skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; } fallthrough; case CHECKSUM_NONE: if (protocol != IPPROTO_TCP && protocol != IPPROTO_UDP) skb->csum = 0; else skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, skb->len - dataoff, protocol, 0); csum = __skb_checksum_complete(skb); } return csum; } EXPORT_SYMBOL(nf_ip_checksum); #endif static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, unsigned int len, u8 protocol) { const struct iphdr *iph = ip_hdr(skb); __sum16 csum = 0; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: if (len == skb->len - dataoff) return nf_ip_checksum(skb, hook, dataoff, protocol); fallthrough; case CHECKSUM_NONE: skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, skb->len - dataoff, 0); skb->ip_summed = CHECKSUM_NONE; return __skb_checksum_complete_head(skb, dataoff + len); } return csum; } __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u8 protocol) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); __sum16 csum = 0; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN) break; if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb->len - dataoff, protocol, csum_sub(skb->csum, skb_checksum(skb, 0, dataoff, 0)))) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; } fallthrough; case CHECKSUM_NONE: skb->csum = ~csum_unfold( csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb->len - dataoff, protocol, csum_sub(0, skb_checksum(skb, 0, dataoff, 0)))); csum = __skb_checksum_complete(skb); } return csum; } EXPORT_SYMBOL(nf_ip6_checksum); static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, unsigned int len, u8 protocol) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); __wsum hsum; __sum16 csum = 0; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: if (len == skb->len - dataoff) return nf_ip6_checksum(skb, hook, dataoff, protocol); fallthrough; case CHECKSUM_NONE: hsum = skb_checksum(skb, 0, dataoff, 0); skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb->len - dataoff, protocol, csum_sub(0, hsum))); skb->ip_summed = CHECKSUM_NONE; return __skb_checksum_complete_head(skb, dataoff + len); } return csum; }; __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u8 protocol, unsigned short family) { __sum16 csum = 0; switch (family) { case AF_INET: csum = nf_ip_checksum(skb, hook, dataoff, protocol); break; case AF_INET6: csum = nf_ip6_checksum(skb, hook, dataoff, protocol); break; } return csum; } EXPORT_SYMBOL_GPL(nf_checksum); __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, unsigned int len, u8 protocol, unsigned short family) { __sum16 csum = 0; switch (family) { case AF_INET: csum = nf_ip_checksum_partial(skb, hook, dataoff, len, protocol); break; case AF_INET6: csum = nf_ip6_checksum_partial(skb, hook, dataoff, len, protocol); break; } return csum; } EXPORT_SYMBOL_GPL(nf_checksum_partial); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family) { const struct nf_ipv6_ops *v6ops __maybe_unused; int ret = 0; switch (family) { case AF_INET: ret = nf_ip_route(net, dst, fl, strict); break; case AF_INET6: ret = nf_ip6_route(net, dst, fl, strict); break; } return ret; } EXPORT_SYMBOL_GPL(nf_route); static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) { #ifdef CONFIG_INET const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); if (entry->state.hook == NF_INET_LOCAL_OUT) { const struct iphdr *iph = ip_hdr(skb); if (!(iph->tos == rt_info->tos && skb->mark == rt_info->mark && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) return ip_route_me_harder(entry->state.net, entry->state.sk, skb, RTN_UNSPEC); } #endif return 0; } int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) { const struct nf_ipv6_ops *v6ops; int ret = 0; switch (entry->state.pf) { case AF_INET: ret = nf_ip_reroute(skb, entry); break; case AF_INET6: v6ops = rcu_dereference(nf_ipv6_ops); if (v6ops) ret = v6ops->reroute(skb, entry); break; } return ret; } /* Only get and check the lengths, not do any hop-by-hop stuff. */ int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen) { int len, off = sizeof(struct ipv6hdr); unsigned char *nh; if (!pskb_may_pull(skb, off + 8)) return -ENOMEM; nh = (unsigned char *)(ipv6_hdr(skb) + 1); len = (nh[1] + 1) << 3; if (!pskb_may_pull(skb, off + len)) return -ENOMEM; nh = skb_network_header(skb); off += 2; len -= 2; while (len > 0) { int optlen; if (nh[off] == IPV6_TLV_PAD1) { off++; len--; continue; } if (len < 2) return -EBADMSG; optlen = nh[off + 1] + 2; if (optlen > len) return -EBADMSG; if (nh[off] == IPV6_TLV_JUMBO) { u32 pkt_len; if (nh[off + 1] != 4 || (off & 3) != 2) return -EBADMSG; pkt_len = ntohl(*(__be32 *)(nh + off + 2)); if (pkt_len <= IPV6_MAXPLEN || ipv6_hdr(skb)->payload_len) return -EBADMSG; if (pkt_len > skb->len - sizeof(struct ipv6hdr)) return -EBADMSG; *plen = pkt_len; } off += optlen; len -= optlen; } return len ? -EBADMSG : 0; } EXPORT_SYMBOL_GPL(nf_ip6_check_hbh_len); |
2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | // SPDX-License-Identifier: GPL-2.0-only /* module that allows mangling of the arp payload */ #include <linux/module.h> #include <linux/netfilter.h> #include <linux/netfilter_arp/arpt_mangle.h> #include <net/sock.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); MODULE_DESCRIPTION("arptables arp payload mangle target"); static unsigned int target(struct sk_buff *skb, const struct xt_action_param *par) { const struct arpt_mangle *mangle = par->targinfo; const struct arphdr *arp; unsigned char *arpptr; int pln, hln; if (skb_ensure_writable(skb, skb->len)) return NF_DROP; arp = arp_hdr(skb); arpptr = skb_network_header(skb) + sizeof(*arp); pln = arp->ar_pln; hln = arp->ar_hln; /* We assume that pln and hln were checked in the match */ if (mangle->flags & ARPT_MANGLE_SDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, mangle->src_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_SIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, &mangle->u_s.src_ip, pln); } arpptr += pln; if (mangle->flags & ARPT_MANGLE_TDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, mangle->tgt_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_TIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, &mangle->u_t.tgt_ip, pln); } return mangle->target; } static int checkentry(const struct xt_tgchk_param *par) { const struct arpt_mangle *mangle = par->targinfo; if (mangle->flags & ~ARPT_MANGLE_MASK || !(mangle->flags & ARPT_MANGLE_MASK)) return -EINVAL; if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && mangle->target != XT_CONTINUE) return -EINVAL; return 0; } static struct xt_target arpt_mangle_reg __read_mostly = { .name = "mangle", .family = NFPROTO_ARP, .target = target, .targetsize = sizeof(struct arpt_mangle), .checkentry = checkentry, .me = THIS_MODULE, }; static int __init arpt_mangle_init(void) { return xt_register_target(&arpt_mangle_reg); } static void __exit arpt_mangle_fini(void) { xt_unregister_target(&arpt_mangle_reg); } module_init(arpt_mangle_init); module_exit(arpt_mangle_fini); |
44 1 43 1 43 42 40 3 11 3 1 1 1 31 1 3 14 15 14 9 5 14 8 1 24 24 1 21 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/resize.c * * Support for resizing an ext4 filesystem while it is mounted. * * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com> * * This could probably be made into a module, because it is not often in use. */ #include <linux/errno.h> #include <linux/slab.h> #include <linux/jiffies.h> #include "ext4_jbd2.h" struct ext4_rcu_ptr { struct rcu_head rcu; void *ptr; }; static void ext4_rcu_ptr_callback(struct rcu_head *head) { struct ext4_rcu_ptr *ptr; ptr = container_of(head, struct ext4_rcu_ptr, rcu); kvfree(ptr->ptr); kfree(ptr); } void ext4_kvfree_array_rcu(void *to_free) { struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); if (ptr) { ptr->ptr = to_free; call_rcu(&ptr->rcu, ext4_rcu_ptr_callback); return; } synchronize_rcu(); kvfree(to_free); } int ext4_resize_begin(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); int ret = 0; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; /* * If the reserved GDT blocks is non-zero, the resize_inode feature * should always be set. */ if (sbi->s_es->s_reserved_gdt_blocks && !ext4_has_feature_resize_inode(sb)) { ext4_error(sb, "resize_inode disabled but reserved GDT blocks non-zero"); return -EFSCORRUPTED; } /* * If we are not using the primary superblock/GDT copy don't resize, * because the user tools have no way of handling this. Probably a * bad time to do it anyways. */ if (EXT4_B2C(sbi, sbi->s_sbh->b_blocknr) != le32_to_cpu(sbi->s_es->s_first_data_block)) { ext4_warning(sb, "won't resize using backup superblock at %llu", (unsigned long long)sbi->s_sbh->b_blocknr); return -EPERM; } /* * We are not allowed to do online-resizing on a filesystem mounted * with error, because it can destroy the filesystem easily. */ if (sbi->s_mount_state & EXT4_ERROR_FS) { ext4_warning(sb, "There are errors in the filesystem, " "so online resizing is not allowed"); return -EPERM; } if (ext4_has_feature_sparse_super2(sb)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with sparse_super2"); return -EOPNOTSUPP; } if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING, &sbi->s_ext4_flags)) ret = -EBUSY; return ret; } int ext4_resize_end(struct super_block *sb, bool update_backups) { clear_bit_unlock(EXT4_FLAGS_RESIZING, &EXT4_SB(sb)->s_ext4_flags); smp_mb__after_atomic(); if (update_backups) return ext4_update_overhead(sb, true); return 0; } static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb, ext4_group_t group) { ext4_grpblk_t overhead; overhead = ext4_bg_num_gdb(sb, group); if (ext4_bg_has_super(sb, group)) overhead += 1 + le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); return overhead; } #define outside(b, first, last) ((b) < (first) || (b) >= (last)) #define inside(b, first, last) ((b) >= (first) && (b) < (last)) static int verify_group_input(struct super_block *sb, struct ext4_new_group_data *input) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; ext4_fsblk_t start = ext4_blocks_count(es); ext4_fsblk_t end = start + input->blocks_count; ext4_group_t group = input->group; ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; unsigned overhead; ext4_fsblk_t metaend; struct buffer_head *bh = NULL; ext4_grpblk_t free_blocks_count, offset; int err = -EINVAL; if (group != sbi->s_groups_count) { ext4_warning(sb, "Cannot add at group %u (only %u groups)", input->group, sbi->s_groups_count); return -EINVAL; } overhead = ext4_group_overhead_blocks(sb, group); metaend = start + overhead; free_blocks_count = input->blocks_count - 2 - overhead - sbi->s_itb_per_group; input->free_clusters_count = EXT4_B2C(sbi, free_blocks_count); if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks " "(%d free, %u reserved)\n", ext4_bg_has_super(sb, input->group) ? "normal" : "no-super", input->group, input->blocks_count, free_blocks_count, input->reserved_blocks); ext4_get_group_no_and_offset(sb, start, NULL, &offset); if (offset != 0) ext4_warning(sb, "Last group not full"); else if (input->reserved_blocks > input->blocks_count / 5) ext4_warning(sb, "Reserved blocks too high (%u)", input->reserved_blocks); else if (free_blocks_count < 0) ext4_warning(sb, "Bad blocks count %u", input->blocks_count); else if (IS_ERR(bh = ext4_sb_bread(sb, end - 1, 0))) { err = PTR_ERR(bh); bh = NULL; ext4_warning(sb, "Cannot read last block (%llu)", end - 1); } else if (outside(input->block_bitmap, start, end)) ext4_warning(sb, "Block bitmap not in group (block %llu)", (unsigned long long)input->block_bitmap); else if (outside(input->inode_bitmap, start, end)) ext4_warning(sb, "Inode bitmap not in group (block %llu)", (unsigned long long)input->inode_bitmap); else if (outside(input->inode_table, start, end) || outside(itend - 1, start, end)) ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)", (unsigned long long)input->inode_table, itend - 1); else if (input->inode_bitmap == input->block_bitmap) ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)", (unsigned long long)input->block_bitmap); else if (inside(input->block_bitmap, input->inode_table, itend)) ext4_warning(sb, "Block bitmap (%llu) in inode table " "(%llu-%llu)", (unsigned long long)input->block_bitmap, (unsigned long long)input->inode_table, itend - 1); else if (inside(input->inode_bitmap, input->inode_table, itend)) ext4_warning(sb, "Inode bitmap (%llu) in inode table " "(%llu-%llu)", (unsigned long long)input->inode_bitmap, (unsigned long long)input->inode_table, itend - 1); else if (inside(input->block_bitmap, start, metaend)) ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)", (unsigned long long)input->block_bitmap, start, metaend - 1); else if (inside(input->inode_bitmap, start, metaend)) ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)", (unsigned long long)input->inode_bitmap, start, metaend - 1); else if (inside(input->inode_table, start, metaend) || inside(itend - 1, start, metaend)) ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table " "(%llu-%llu)", (unsigned long long)input->inode_table, itend - 1, start, metaend - 1); else err = 0; brelse(bh); return err; } /* * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex * group each time. */ struct ext4_new_flex_group_data { struct ext4_new_group_data *groups; /* new_group_data for groups in the flex group */ __u16 *bg_flags; /* block group flags of groups in @groups */ ext4_group_t count; /* number of groups in @groups */ }; /* * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of * @flexbg_size. * * Returns NULL on failure otherwise address of the allocated structure. */ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) { struct ext4_new_flex_group_data *flex_gd; flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS); if (flex_gd == NULL) goto out3; if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data)) goto out2; flex_gd->count = flexbg_size; flex_gd->groups = kmalloc_array(flexbg_size, sizeof(struct ext4_new_group_data), GFP_NOFS); if (flex_gd->groups == NULL) goto out2; flex_gd->bg_flags = kmalloc_array(flexbg_size, sizeof(__u16), GFP_NOFS); if (flex_gd->bg_flags == NULL) goto out1; return flex_gd; out1: kfree(flex_gd->groups); out2: kfree(flex_gd); out3: return NULL; } static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) { kfree(flex_gd->bg_flags); kfree(flex_gd->groups); kfree(flex_gd); } /* * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps * and inode tables for a flex group. * * This function is used by 64bit-resize. Note that this function allocates * group tables from the 1st group of groups contained by @flexgd, which may * be a partial of a flex group. * * @sb: super block of fs to which the groups belongs * * Returns 0 on a successful allocation of the metadata blocks in the * block group. */ static int ext4_alloc_group_tables(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, int flexbg_size) { struct ext4_new_group_data *group_data = flex_gd->groups; ext4_fsblk_t start_blk; ext4_fsblk_t last_blk; ext4_group_t src_group; ext4_group_t bb_index = 0; ext4_group_t ib_index = 0; ext4_group_t it_index = 0; ext4_group_t group; ext4_group_t last_group; unsigned overhead; __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0; int i; BUG_ON(flex_gd->count == 0 || group_data == NULL); src_group = group_data[0].group; last_group = src_group + flex_gd->count - 1; BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) != (last_group & ~(flexbg_size - 1)))); next_group: group = group_data[0].group; if (src_group >= group_data[0].group + flex_gd->count) return -ENOSPC; start_blk = ext4_group_first_block_no(sb, src_group); last_blk = start_blk + group_data[src_group - group].blocks_count; overhead = ext4_group_overhead_blocks(sb, src_group); start_blk += overhead; /* We collect contiguous blocks as much as possible. */ src_group++; for (; src_group <= last_group; src_group++) { overhead = ext4_group_overhead_blocks(sb, src_group); if (overhead == 0) last_blk += group_data[src_group - group].blocks_count; else break; } /* Allocate block bitmaps */ for (; bb_index < flex_gd->count; bb_index++) { if (start_blk >= last_blk) goto next_group; group_data[bb_index].block_bitmap = start_blk++; group = ext4_get_group_number(sb, start_blk - 1); group -= group_data[0].group; group_data[group].mdata_blocks++; flex_gd->bg_flags[group] &= uninit_mask; } /* Allocate inode bitmaps */ for (; ib_index < flex_gd->count; ib_index++) { if (start_blk >= last_blk) goto next_group; group_data[ib_index].inode_bitmap = start_blk++; group = ext4_get_group_number(sb, start_blk - 1); group -= group_data[0].group; group_data[group].mdata_blocks++; flex_gd->bg_flags[group] &= uninit_mask; } /* Allocate inode tables */ for (; it_index < flex_gd->count; it_index++) { unsigned int itb = EXT4_SB(sb)->s_itb_per_group; ext4_fsblk_t next_group_start; if (start_blk + itb > last_blk) goto next_group; group_data[it_index].inode_table = start_blk; group = ext4_get_group_number(sb, start_blk); next_group_start = ext4_group_first_block_no(sb, group + 1); group -= group_data[0].group; if (start_blk + itb > next_group_start) { flex_gd->bg_flags[group + 1] &= uninit_mask; overhead = start_blk + itb - next_group_start; group_data[group + 1].mdata_blocks += overhead; itb -= overhead; } group_data[group].mdata_blocks += itb; flex_gd->bg_flags[group] &= uninit_mask; start_blk += EXT4_SB(sb)->s_itb_per_group; } /* Update free clusters count to exclude metadata blocks */ for (i = 0; i < flex_gd->count; i++) { group_data[i].free_clusters_count -= EXT4_NUM_B2C(EXT4_SB(sb), group_data[i].mdata_blocks); } if (test_opt(sb, DEBUG)) { int i; group = group_data[0].group; printk(KERN_DEBUG "EXT4-fs: adding a flex group with " "%d groups, flexbg size is %d:\n", flex_gd->count, flexbg_size); for (i = 0; i < flex_gd->count; i++) { ext4_debug( "adding %s group %u: %u blocks (%d free, %d mdata blocks)\n", ext4_bg_has_super(sb, group + i) ? "normal" : "no-super", group + i, group_data[i].blocks_count, group_data[i].free_clusters_count, group_data[i].mdata_blocks); } } return 0; } static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, ext4_fsblk_t blk) { struct buffer_head *bh; int err; bh = sb_getblk(sb, blk); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE); if (err) { brelse(bh); bh = ERR_PTR(err); } else { memset(bh->b_data, 0, sb->s_blocksize); set_buffer_uptodate(bh); } return bh; } static int ext4_resize_ensure_credits_batch(handle_t *handle, int credits) { return ext4_journal_ensure_credits_fn(handle, credits, EXT4_MAX_TRANS_DATA, 0, 0); } /* * set_flexbg_block_bitmap() mark clusters [@first_cluster, @last_cluster] used. * * Helper function for ext4_setup_new_group_blocks() which set . * * @sb: super block * @handle: journal handle * @flex_gd: flex group data */ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, struct ext4_new_flex_group_data *flex_gd, ext4_fsblk_t first_cluster, ext4_fsblk_t last_cluster) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t count = last_cluster - first_cluster + 1; ext4_group_t count2; ext4_debug("mark clusters [%llu-%llu] used\n", first_cluster, last_cluster); for (; count > 0; count -= count2, first_cluster += count2) { ext4_fsblk_t start; struct buffer_head *bh; ext4_group_t group; int err; group = ext4_get_group_number(sb, EXT4_C2B(sbi, first_cluster)); start = EXT4_B2C(sbi, ext4_group_first_block_no(sb, group)); group -= flex_gd->groups[0].group; count2 = EXT4_CLUSTERS_PER_GROUP(sb) - (first_cluster - start); if (count2 > count) count2 = count; if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) { BUG_ON(flex_gd->count > 1); continue; } err = ext4_resize_ensure_credits_batch(handle, 1); if (err < 0) return err; bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap); if (unlikely(!bh)) return -ENOMEM; BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE); if (err) { brelse(bh); return err; } ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", first_cluster, first_cluster - start, count2); mb_set_bits(bh->b_data, first_cluster - start, count2); err = ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); if (unlikely(err)) return err; } return 0; } /* * Set up the block and inode bitmaps, and the inode table for the new groups. * This doesn't need to be part of the main transaction, since we are only * changing blocks outside the actual filesystem. We still do journaling to * ensure the recovery is correct in case of a failure just after resize. * If any part of this fails, we simply abort the resize. * * setup_new_flex_group_blocks handles a flex group as follow: * 1. copy super block and GDT, and initialize group tables if necessary. * In this step, we only set bits in blocks bitmaps for blocks taken by * super block and GDT. * 2. allocate group tables in block bitmaps, that is, set bits in block * bitmap for blocks taken by group tables. */ static int setup_new_flex_group_blocks(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd) { int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group}; ext4_fsblk_t start; ext4_fsblk_t block; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct ext4_new_group_data *group_data = flex_gd->groups; __u16 *bg_flags = flex_gd->bg_flags; handle_t *handle; ext4_group_t group, count; struct buffer_head *bh = NULL; int reserved_gdb, i, j, err = 0, err2; int meta_bg; BUG_ON(!flex_gd->count || !group_data || group_data[0].group != sbi->s_groups_count); reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); meta_bg = ext4_has_feature_meta_bg(sb); /* This transaction may be extended/restarted along the way */ handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA); if (IS_ERR(handle)) return PTR_ERR(handle); group = group_data[0].group; for (i = 0; i < flex_gd->count; i++, group++) { unsigned long gdblocks; ext4_grpblk_t overhead; gdblocks = ext4_bg_num_gdb(sb, group); start = ext4_group_first_block_no(sb, group); if (meta_bg == 0 && !ext4_bg_has_super(sb, group)) goto handle_itb; if (meta_bg == 1) goto handle_itb; block = start + ext4_bg_has_super(sb, group); /* Copy all of the GDT blocks into the backup in this group */ for (j = 0; j < gdblocks; j++, block++) { struct buffer_head *gdb; ext4_debug("update backup group %#04llx\n", block); err = ext4_resize_ensure_credits_batch(handle, 1); if (err < 0) goto out; gdb = sb_getblk(sb, block); if (unlikely(!gdb)) { err = -ENOMEM; goto out; } BUFFER_TRACE(gdb, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, gdb, EXT4_JTR_NONE); if (err) { brelse(gdb); goto out; } memcpy(gdb->b_data, sbi_array_rcu_deref(sbi, s_group_desc, j)->b_data, gdb->b_size); set_buffer_uptodate(gdb); err = ext4_handle_dirty_metadata(handle, NULL, gdb); if (unlikely(err)) { brelse(gdb); goto out; } brelse(gdb); } /* Zero out all of the reserved backup group descriptor * table blocks */ if (ext4_bg_has_super(sb, group)) { err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, GFP_NOFS); if (err) goto out; } handle_itb: /* Initialize group tables of the group @group */ if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) goto handle_bb; /* Zero out all of the inode table blocks */ block = group_data[i].inode_table; ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", block, sbi->s_itb_per_group); err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); if (err) goto out; handle_bb: if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT) goto handle_ib; /* Initialize block bitmap of the @group */ block = group_data[i].block_bitmap; err = ext4_resize_ensure_credits_batch(handle, 1); if (err < 0) goto out; bh = bclean(handle, sb, block); if (IS_ERR(bh)) { err = PTR_ERR(bh); goto out; } overhead = ext4_group_overhead_blocks(sb, group); if (overhead != 0) { ext4_debug("mark backup superblock %#04llx (+0)\n", start); mb_set_bits(bh->b_data, 0, EXT4_NUM_B2C(sbi, overhead)); } ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count), sb->s_blocksize * 8, bh->b_data); err = ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); if (err) goto out; handle_ib: if (bg_flags[i] & EXT4_BG_INODE_UNINIT) continue; /* Initialize inode bitmap of the @group */ block = group_data[i].inode_bitmap; err = ext4_resize_ensure_credits_batch(handle, 1); if (err < 0) goto out; /* Mark unused entries in inode bitmap used */ bh = bclean(handle, sb, block); if (IS_ERR(bh)) { err = PTR_ERR(bh); goto out; } ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); err = ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); if (err) goto out; } /* Mark group tables in block bitmap */ for (j = 0; j < GROUP_TABLE_COUNT; j++) { count = group_table_count[j]; start = (&group_data[0].block_bitmap)[j]; block = start; for (i = 1; i < flex_gd->count; i++) { block += group_table_count[j]; if (block == (&group_data[i].block_bitmap)[j]) { count += group_table_count[j]; continue; } err = set_flexbg_block_bitmap(sb, handle, flex_gd, EXT4_B2C(sbi, start), EXT4_B2C(sbi, start + count - 1)); if (err) goto out; count = group_table_count[j]; start = (&group_data[i].block_bitmap)[j]; block = start; } err = set_flexbg_block_bitmap(sb, handle, flex_gd, EXT4_B2C(sbi, start), EXT4_B2C(sbi, start + count - 1)); if (err) goto out; } out: err2 = ext4_journal_stop(handle); if (err2 && !err) err = err2; return err; } /* * Iterate through the groups which hold BACKUP superblock/GDT copies in an * ext4 filesystem. The counters should be initialized to 1, 5, and 7 before * calling this for the first time. In a sparse filesystem it will be the * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... */ unsigned int ext4_list_backups(struct super_block *sb, unsigned int *three, unsigned int *five, unsigned int *seven) { struct ext4_super_block *es = EXT4_SB(sb)->s_es; unsigned int *min = three; int mult = 3; unsigned int ret; if (ext4_has_feature_sparse_super2(sb)) { do { if (*min > 2) return UINT_MAX; ret = le32_to_cpu(es->s_backup_bgs[*min - 1]); *min += 1; } while (!ret); return ret; } if (!ext4_has_feature_sparse_super(sb)) { ret = *min; *min += 1; return ret; } if (*five < *min) { min = five; mult = 5; } if (*seven < *min) { min = seven; mult = 7; } ret = *min; *min *= mult; return ret; } /* * Check that all of the backup GDT blocks are held in the primary GDT block. * It is assumed that they are stored in group order. Returns the number of * groups in current filesystem that have BACKUPS, or -ve error code. */ static int verify_reserved_gdb(struct super_block *sb, ext4_group_t end, struct buffer_head *primary) { const ext4_fsblk_t blk = primary->b_blocknr; unsigned three = 1; unsigned five = 5; unsigned seven = 7; unsigned grp; __le32 *p = (__le32 *)primary->b_data; int gdbackups = 0; while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { if (le32_to_cpu(*p++) != grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ ext4_warning(sb, "reserved GDT %llu" " missing grp %d (%llu)", blk, grp, grp * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + blk); return -EINVAL; } if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb)) return -EFBIG; } return gdbackups; } /* * Called when we need to bring a reserved group descriptor table block into * use from the resize inode. The primary copy of the new GDT block currently * is an indirect block (under the double indirect block in the resize inode). * The new backup GDT blocks will be stored as leaf blocks in this indirect * block, in group order. Even though we know all the block numbers we need, * we check to ensure that the resize inode has actually reserved these blocks. * * Don't need to update the block bitmaps because the blocks are still in use. * * We get all of the error cases out of the way, so that we are sure to not * fail once we start modifying the data on disk, because JBD has no rollback. */ static int add_new_gdb(handle_t *handle, struct inode *inode, ext4_group_t group) { struct super_block *sb = inode->i_sb; struct ext4_super_block *es = EXT4_SB(sb)->s_es; unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; struct buffer_head **o_group_desc, **n_group_desc = NULL; struct buffer_head *dind = NULL; struct buffer_head *gdb_bh = NULL; int gdbackups; struct ext4_iloc iloc = { .bh = NULL }; __le32 *data; int err; if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", gdb_num); gdb_bh = ext4_sb_bread(sb, gdblock, 0); if (IS_ERR(gdb_bh)) return PTR_ERR(gdb_bh); gdbackups = verify_reserved_gdb(sb, group, gdb_bh); if (gdbackups < 0) { err = gdbackups; goto errout; } data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK; dind = ext4_sb_bread(sb, le32_to_cpu(*data), 0); if (IS_ERR(dind)) { err = PTR_ERR(dind); dind = NULL; goto errout; } data = (__le32 *)dind->b_data; if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { ext4_warning(sb, "new group %u GDT block %llu not reserved", group, gdblock); err = -EINVAL; goto errout; } BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, EXT4_SB(sb)->s_sbh, EXT4_JTR_NONE); if (unlikely(err)) goto errout; BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, gdb_bh, EXT4_JTR_NONE); if (unlikely(err)) goto errout; BUFFER_TRACE(dind, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, dind, EXT4_JTR_NONE); if (unlikely(err)) { ext4_std_error(sb, err); goto errout; } /* ext4_reserve_inode_write() gets a reference on the iloc */ err = ext4_reserve_inode_write(handle, inode, &iloc); if (unlikely(err)) goto errout; n_group_desc = kvmalloc((gdb_num + 1) * sizeof(struct buffer_head *), GFP_KERNEL); if (!n_group_desc) { err = -ENOMEM; ext4_warning(sb, "not enough memory for %lu groups", gdb_num + 1); goto errout; } /* * Finally, we have all of the possible failures behind us... * * Remove new GDT block from inode double-indirect block and clear out * the new GDT block for use (which also "frees" the backup GDT blocks * from the reserved inode). We don't need to change the bitmaps for * these blocks, because they are marked as in-use from being in the * reserved inode, and will become GDT blocks (primary and backup). */ data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0; err = ext4_handle_dirty_metadata(handle, NULL, dind); if (unlikely(err)) { ext4_std_error(sb, err); goto errout; } inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> (9 - EXT4_SB(sb)->s_cluster_bits); ext4_mark_iloc_dirty(handle, inode, &iloc); memset(gdb_bh->b_data, 0, sb->s_blocksize); err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); if (unlikely(err)) { ext4_std_error(sb, err); iloc.bh = NULL; goto errout; } brelse(dind); rcu_read_lock(); o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; ext4_kvfree_array_rcu(o_group_desc); lock_buffer(EXT4_SB(sb)->s_sbh); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); ext4_superblock_csum_set(sb); unlock_buffer(EXT4_SB(sb)->s_sbh); err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); if (err) ext4_std_error(sb, err); return err; errout: kvfree(n_group_desc); brelse(iloc.bh); brelse(dind); brelse(gdb_bh); ext4_debug("leaving with error %d\n", err); return err; } /* * If there is no available space in the existing block group descriptors for * the new block group and there are no reserved block group descriptors, then * the meta_bg feature will get enabled, and es->s_first_meta_bg will get set * to the first block group that is managed using meta_bg and s_first_meta_bg * must be a multiple of EXT4_DESC_PER_BLOCK(sb). * This function will be called when first group of meta_bg is added to bring * new group descriptors block of new added meta_bg. */ static int add_new_gdb_meta_bg(struct super_block *sb, handle_t *handle, ext4_group_t group) { ext4_fsblk_t gdblock; struct buffer_head *gdb_bh; struct buffer_head **o_group_desc, **n_group_desc; unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); int err; gdblock = ext4_group_first_block_no(sb, group) + ext4_bg_has_super(sb, group); gdb_bh = ext4_sb_bread(sb, gdblock, 0); if (IS_ERR(gdb_bh)) return PTR_ERR(gdb_bh); n_group_desc = kvmalloc((gdb_num + 1) * sizeof(struct buffer_head *), GFP_KERNEL); if (!n_group_desc) { brelse(gdb_bh); err = -ENOMEM; ext4_warning(sb, "not enough memory for %lu groups", gdb_num + 1); return err; } rcu_read_lock(); o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, gdb_bh, EXT4_JTR_NONE); if (err) { kvfree(n_group_desc); brelse(gdb_bh); return err; } rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; ext4_kvfree_array_rcu(o_group_desc); return err; } /* * Called when we are adding a new group which has a backup copy of each of * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. * We need to add these reserved backup GDT blocks to the resize inode, so * that they are kept for future resizing and not allocated to files. * * Each reserved backup GDT block will go into a different indirect block. * The indirect blocks are actually the primary reserved GDT blocks, * so we know in advance what their block numbers are. We only get the * double-indirect block to verify it is pointing to the primary reserved * GDT blocks so we don't overwrite a data block by accident. The reserved * backup GDT blocks are stored in their reserved primary GDT block. */ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, ext4_group_t group) { struct super_block *sb = inode->i_sb; int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); int cluster_bits = EXT4_SB(sb)->s_cluster_bits; struct buffer_head **primary; struct buffer_head *dind; struct ext4_iloc iloc; ext4_fsblk_t blk; __le32 *data, *end; int gdbackups = 0; int res, i; int err; primary = kmalloc_array(reserved_gdb, sizeof(*primary), GFP_NOFS); if (!primary) return -ENOMEM; data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK; dind = ext4_sb_bread(sb, le32_to_cpu(*data), 0); if (IS_ERR(dind)) { err = PTR_ERR(dind); dind = NULL; goto exit_free; } blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count; data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count % EXT4_ADDR_PER_BLOCK(sb)); end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb); /* Get each reserved primary GDT block and verify it holds backups */ for (res = 0; res < reserved_gdb; res++, blk++) { if (le32_to_cpu(*data) != blk) { ext4_warning(sb, "reserved block %llu" " not at offset %ld", blk, (long)(data - (__le32 *)dind->b_data)); err = -EINVAL; goto exit_bh; } primary[res] = ext4_sb_bread(sb, blk, 0); if (IS_ERR(primary[res])) { err = PTR_ERR(primary[res]); primary[res] = NULL; goto exit_bh; } gdbackups = verify_reserved_gdb(sb, group, primary[res]); if (gdbackups < 0) { brelse(primary[res]); err = gdbackups; goto exit_bh; } if (++data >= end) data = (__le32 *)dind->b_data; } for (i = 0; i < reserved_gdb; i++) { BUFFER_TRACE(primary[i], "get_write_access"); if ((err = ext4_journal_get_write_access(handle, sb, primary[i], EXT4_JTR_NONE))) goto exit_bh; } if ((err = ext4_reserve_inode_write(handle, inode, &iloc))) goto exit_bh; /* * Finally we can add each of the reserved backup GDT blocks from * the new group to its reserved primary GDT block. */ blk = group * EXT4_BLOCKS_PER_GROUP(sb); for (i = 0; i < reserved_gdb; i++) { int err2; data = (__le32 *)primary[i]->b_data; data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr); err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]); if (!err) err = err2; } inode->i_blocks += reserved_gdb * sb->s_blocksize >> (9 - cluster_bits); ext4_mark_iloc_dirty(handle, inode, &iloc); exit_bh: while (--res >= 0) brelse(primary[res]); brelse(dind); exit_free: kfree(primary); return err; } static inline void ext4_set_block_group_nr(struct super_block *sb, char *data, ext4_group_t group) { struct ext4_super_block *es = (struct ext4_super_block *) data; es->s_block_group_nr = cpu_to_le16(group); if (ext4_has_metadata_csum(sb)) es->s_checksum = ext4_superblock_csum(sb, es); } /* * Update the backup copies of the ext4 metadata. These don't need to be part * of the main resize transaction, because e2fsck will re-write them if there * is a problem (basically only OOM will cause a problem). However, we * _should_ update the backups if possible, in case the primary gets trashed * for some reason and we need to run e2fsck from a backup superblock. The * important part is that the new block and inode counts are in the backup * superblocks, and the location of the new group metadata in the GDT backups. * * We do not need take the s_resize_lock for this, because these * blocks are not otherwise touched by the filesystem code when it is * mounted. We don't need to worry about last changing from * sbi->s_groups_count, because the worst that can happen is that we * do not copy the full number of backups at this time. The resize * which changed s_groups_count will backup again. */ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, int size, int meta_bg) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t last; const int bpg = EXT4_BLOCKS_PER_GROUP(sb); unsigned three = 1; unsigned five = 5; unsigned seven = 7; ext4_group_t group = 0; int rest = sb->s_blocksize - size; handle_t *handle; int err = 0, err2; handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA); if (IS_ERR(handle)) { group = 1; err = PTR_ERR(handle); goto exit_err; } if (meta_bg == 0) { group = ext4_list_backups(sb, &three, &five, &seven); last = sbi->s_groups_count; } else { group = ext4_get_group_number(sb, blk_off) + 1; last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2); } while (group < sbi->s_groups_count) { struct buffer_head *bh; ext4_fsblk_t backup_block; int has_super = ext4_bg_has_super(sb, group); ext4_fsblk_t first_block = ext4_group_first_block_no(sb, group); /* Out of journal space, and can't get more - abort - so sad */ err = ext4_resize_ensure_credits_batch(handle, 1); if (err < 0) break; if (meta_bg == 0) backup_block = ((ext4_fsblk_t)group) * bpg + blk_off; else backup_block = first_block + has_super; bh = sb_getblk(sb, backup_block); if (unlikely(!bh)) { err = -ENOMEM; break; } ext4_debug("update metadata backup %llu(+%llu)\n", backup_block, backup_block - ext4_group_first_block_no(sb, group)); BUFFER_TRACE(bh, "get_write_access"); if ((err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE))) { brelse(bh); break; } lock_buffer(bh); memcpy(bh->b_data, data, size); if (rest) memset(bh->b_data + size, 0, rest); if (has_super && (backup_block == first_block)) ext4_set_block_group_nr(sb, bh->b_data, group); set_buffer_uptodate(bh); unlock_buffer(bh); err = ext4_handle_dirty_metadata(handle, NULL, bh); if (unlikely(err)) ext4_std_error(sb, err); brelse(bh); if (meta_bg == 0) group = ext4_list_backups(sb, &three, &five, &seven); else if (group == last) break; else group = last; } if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; /* * Ugh! Need to have e2fsck write the backup copies. It is too * late to revert the resize, we shouldn't fail just because of * the backup copies (they are only needed in case of corruption). * * However, if we got here we have a journal problem too, so we * can't really start a transaction to mark the superblock. * Chicken out and just set the flag on the hope it will be written * to disk, and if not - we will simply wait until next fsck. */ exit_err: if (err) { ext4_warning(sb, "can't update backup for group %u (err %d), " "forcing fsck on next reboot", group, err); sbi->s_mount_state &= ~EXT4_VALID_FS; sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); mark_buffer_dirty(sbi->s_sbh); } } /* * ext4_add_new_descs() adds @count group descriptor of groups * starting at @group * * @handle: journal handle * @sb: super block * @group: the group no. of the first group desc to be added * @resize_inode: the resize inode * @count: number of group descriptors to be added */ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, ext4_group_t group, struct inode *resize_inode, ext4_group_t count) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct buffer_head *gdb_bh; int i, gdb_off, gdb_num, err = 0; int meta_bg; meta_bg = ext4_has_feature_meta_bg(sb); for (i = 0; i < count; i++, group++) { int reserved_gdb = ext4_bg_has_super(sb, group) ? le16_to_cpu(es->s_reserved_gdt_blocks) : 0; gdb_off = group % EXT4_DESC_PER_BLOCK(sb); gdb_num = group / EXT4_DESC_PER_BLOCK(sb); /* * We will only either add reserved group blocks to a backup group * or remove reserved blocks for the first group in a new group block. * Doing both would be mean more complex code, and sane people don't * use non-sparse filesystems anymore. This is already checked above. */ if (gdb_off) { gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num); BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, gdb_bh, EXT4_JTR_NONE); if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) err = reserve_backup_gdb(handle, resize_inode, group); } else if (meta_bg != 0) { err = add_new_gdb_meta_bg(sb, handle, group); } else { err = add_new_gdb(handle, resize_inode, group); } if (err) break; } return err; } static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block) { struct buffer_head *bh = sb_getblk(sb, block); if (unlikely(!bh)) return NULL; if (!bh_uptodate_or_lock(bh)) { if (ext4_read_bh(bh, 0, NULL) < 0) { brelse(bh); return NULL; } } return bh; } static int ext4_set_bitmap_checksums(struct super_block *sb, struct ext4_group_desc *gdp, struct ext4_new_group_data *group_data) { struct buffer_head *bh; if (!ext4_has_metadata_csum(sb)) return 0; bh = ext4_get_bitmap(sb, group_data->inode_bitmap); if (!bh) return -EIO; ext4_inode_bitmap_csum_set(sb, gdp, bh, EXT4_INODES_PER_GROUP(sb) / 8); brelse(bh); bh = ext4_get_bitmap(sb, group_data->block_bitmap); if (!bh) return -EIO; ext4_block_bitmap_csum_set(sb, gdp, bh); brelse(bh); return 0; } /* * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg */ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, struct ext4_new_flex_group_data *flex_gd) { struct ext4_new_group_data *group_data = flex_gd->groups; struct ext4_group_desc *gdp; struct ext4_sb_info *sbi = EXT4_SB(sb); struct buffer_head *gdb_bh; ext4_group_t group; __u16 *bg_flags = flex_gd->bg_flags; int i, gdb_off, gdb_num, err = 0; for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) { group = group_data->group; gdb_off = group % EXT4_DESC_PER_BLOCK(sb); gdb_num = group / EXT4_DESC_PER_BLOCK(sb); /* * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). */ gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num); /* Update group descriptor block for new group */ gdp = (struct ext4_group_desc *)(gdb_bh->b_data + gdb_off * EXT4_DESC_SIZE(sb)); memset(gdp, 0, EXT4_DESC_SIZE(sb)); ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap); ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap); err = ext4_set_bitmap_checksums(sb, gdp, group_data); if (err) { ext4_std_error(sb, err); break; } ext4_inode_table_set(sb, gdp, group_data->inode_table); ext4_free_group_clusters_set(sb, gdp, group_data->free_clusters_count); ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); if (ext4_has_group_desc_csum(sb)) ext4_itable_unused_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); gdp->bg_flags = cpu_to_le16(*bg_flags); ext4_group_desc_csum_set(sb, group, gdp); err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); if (unlikely(err)) { ext4_std_error(sb, err); break; } /* * We can allocate memory for mb_alloc based on the new group * descriptor */ err = ext4_mb_add_groupinfo(sb, group, gdp); if (err) break; } return err; } static void ext4_add_overhead(struct super_block *sb, const ext4_fsblk_t overhead) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; sbi->s_overhead += overhead; es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead); smp_wmb(); } /* * ext4_update_super() updates the super block so that the newly added * groups can be seen by the filesystem. * * @sb: super block * @flex_gd: new added groups */ static void ext4_update_super(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd) { ext4_fsblk_t blocks_count = 0; ext4_fsblk_t free_blocks = 0; ext4_fsblk_t reserved_blocks = 0; struct ext4_new_group_data *group_data = flex_gd->groups; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; int i; BUG_ON(flex_gd->count == 0 || group_data == NULL); /* * Make the new blocks and inodes valid next. We do this before * increasing the group count so that once the group is enabled, * all of its blocks and inodes are already valid. * * We always allocate group-by-group, then block-by-block or * inode-by-inode within a group, so enabling these * blocks/inodes before the group is live won't actually let us * allocate the new space yet. */ for (i = 0; i < flex_gd->count; i++) { blocks_count += group_data[i].blocks_count; free_blocks += EXT4_C2B(sbi, group_data[i].free_clusters_count); } reserved_blocks = ext4_r_blocks_count(es) * 100; reserved_blocks = div64_u64(reserved_blocks, ext4_blocks_count(es)); reserved_blocks *= blocks_count; do_div(reserved_blocks, 100); lock_buffer(sbi->s_sbh); ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks); le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * flex_gd->count); le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * flex_gd->count); ext4_debug("free blocks count %llu", ext4_free_blocks_count(es)); /* * We need to protect s_groups_count against other CPUs seeing * inconsistent state in the superblock. * * The precise rules we use are: * * * Writers must perform a smp_wmb() after updating all * dependent data and before modifying the groups count * * * Readers must perform an smp_rmb() after reading the groups * count and before reading any dependent data. * * NB. These rules can be relaxed when checking the group count * while freeing data, as we can only allocate from a block * group after serialising against the group count, and we can * only then free after serialising in turn against that * allocation. */ smp_wmb(); /* Update the global fs size fields */ sbi->s_groups_count += flex_gd->count; sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); /* Update the reserved block counts only once the new group is * active. */ ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + reserved_blocks); /* Update the free space counts */ percpu_counter_add(&sbi->s_freeclusters_counter, EXT4_NUM_B2C(sbi, free_blocks)); percpu_counter_add(&sbi->s_freeinodes_counter, EXT4_INODES_PER_GROUP(sb) * flex_gd->count); ext4_debug("free blocks count %llu", percpu_counter_read(&sbi->s_freeclusters_counter)); if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) { ext4_group_t flex_group; struct flex_groups *fg; flex_group = ext4_flex_group(sbi, group_data[0].group); fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), &fg->free_clusters); atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, &fg->free_inodes); } /* * Update the fs overhead information. * * For bigalloc, if the superblock already has a properly calculated * overhead, update it with a value based on numbers already computed * above for the newly allocated capacity. */ if (ext4_has_feature_bigalloc(sb) && (sbi->s_overhead != 0)) ext4_add_overhead(sb, EXT4_NUM_B2C(sbi, blocks_count - free_blocks)); else ext4_calculate_overhead(sb); es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead); ext4_superblock_csum_set(sb); unlock_buffer(sbi->s_sbh); if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: added group %u:" "%llu blocks(%llu free %llu reserved)\n", flex_gd->count, blocks_count, free_blocks, reserved_blocks); } /* Add a flex group to an fs. Ensure we handle all possible error conditions * _before_ we start modifying the filesystem, because we cannot abort the * transaction and not have it write the data to disk. */ static int ext4_flex_group_add(struct super_block *sb, struct inode *resize_inode, struct ext4_new_flex_group_data *flex_gd) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; ext4_fsblk_t o_blocks_count; ext4_grpblk_t last; ext4_group_t group; handle_t *handle; unsigned reserved_gdb; int err = 0, err2 = 0, credit; BUG_ON(!flex_gd->count || !flex_gd->groups || !flex_gd->bg_flags); reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); o_blocks_count = ext4_blocks_count(es); ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); BUG_ON(last); err = setup_new_flex_group_blocks(sb, flex_gd); if (err) goto exit; /* * We will always be modifying at least the superblock and GDT * blocks. If we are adding a group past the last current GDT block, * we will also modify the inode and the dindirect block. If we * are adding a group with superblock/GDT backups we will also * modify each of the reserved GDT dindirect blocks. */ credit = 3; /* sb, resize inode, resize inode dindirect */ /* GDT blocks */ credit += 1 + DIV_ROUND_UP(flex_gd->count, EXT4_DESC_PER_BLOCK(sb)); credit += reserved_gdb; /* Reserved GDT dindirect blocks */ handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credit); if (IS_ERR(handle)) { err = PTR_ERR(handle); goto exit; } BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh, EXT4_JTR_NONE); if (err) goto exit_journal; group = flex_gd->groups[0].group; BUG_ON(group != sbi->s_groups_count); err = ext4_add_new_descs(handle, sb, group, resize_inode, flex_gd->count); if (err) goto exit_journal; err = ext4_setup_new_descs(handle, sb, flex_gd); if (err) goto exit_journal; ext4_update_super(sb, flex_gd); err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); exit_journal: err2 = ext4_journal_stop(handle); if (!err) err = err2; if (!err) { int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); int gdb_num_end = ((group + flex_gd->count - 1) / EXT4_DESC_PER_BLOCK(sb)); int meta_bg = ext4_has_feature_meta_bg(sb); sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr - ext4_group_first_block_no(sb, 0); update_backups(sb, ext4_group_first_block_no(sb, 0), (char *)es, sizeof(struct ext4_super_block), 0); for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num); update_backups(sb, gdb_bh->b_blocknr - padding_blocks, gdb_bh->b_data, gdb_bh->b_size, meta_bg); } } exit: return err; } static int ext4_setup_next_flex_gd(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, ext4_fsblk_t n_blocks_count, unsigned long flexbg_size) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct ext4_new_group_data *group_data = flex_gd->groups; ext4_fsblk_t o_blocks_count; ext4_group_t n_group; ext4_group_t group; ext4_group_t last_group; ext4_grpblk_t last; ext4_grpblk_t clusters_per_group; unsigned long i; clusters_per_group = EXT4_CLUSTERS_PER_GROUP(sb); o_blocks_count = ext4_blocks_count(es); if (o_blocks_count == n_blocks_count) return 0; ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); BUG_ON(last); ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last); last_group = group | (flexbg_size - 1); if (last_group > n_group) last_group = n_group; flex_gd->count = last_group - group + 1; for (i = 0; i < flex_gd->count; i++) { int overhead; group_data[i].group = group + i; group_data[i].blocks_count = EXT4_BLOCKS_PER_GROUP(sb); overhead = ext4_group_overhead_blocks(sb, group + i); group_data[i].mdata_blocks = overhead; group_data[i].free_clusters_count = EXT4_CLUSTERS_PER_GROUP(sb); if (ext4_has_group_desc_csum(sb)) { flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | EXT4_BG_INODE_UNINIT; if (!test_opt(sb, INIT_INODE_TABLE)) flex_gd->bg_flags[i] |= EXT4_BG_INODE_ZEROED; } else flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED; } if (last_group == n_group && ext4_has_group_desc_csum(sb)) /* We need to initialize block bitmap of last group. */ flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT; if ((last_group == n_group) && (last != clusters_per_group - 1)) { group_data[i - 1].blocks_count = EXT4_C2B(sbi, last + 1); group_data[i - 1].free_clusters_count -= clusters_per_group - last - 1; } return 1; } /* Add group descriptor data to an existing or new group descriptor block. * Ensure we handle all possible error conditions _before_ we start modifying * the filesystem, because we cannot abort the transaction and not have it * write the data to disk. * * If we are on a GDT block boundary, we need to get the reserved GDT block. * Otherwise, we may need to add backup GDT blocks for a sparse group. * * We only need to hold the superblock lock while we are actually adding * in the new group's counts to the superblock. Prior to that we have * not really "added" the group at all. We re-check that we are still * adding in the last group in case things have changed since verifying. */ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) { struct ext4_new_flex_group_data flex_gd; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; int reserved_gdb = ext4_bg_has_super(sb, input->group) ? le16_to_cpu(es->s_reserved_gdt_blocks) : 0; struct inode *inode = NULL; int gdb_off; int err; __u16 bg_flags = 0; gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); if (gdb_off == 0 && !ext4_has_feature_sparse_super(sb)) { ext4_warning(sb, "Can't resize non-sparse filesystem further"); return -EPERM; } if (ext4_blocks_count(es) + input->blocks_count < ext4_blocks_count(es)) { ext4_warning(sb, "blocks_count overflow"); return -EINVAL; } if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < le32_to_cpu(es->s_inodes_count)) { ext4_warning(sb, "inodes_count overflow"); return -EINVAL; } if (reserved_gdb || gdb_off == 0) { if (!ext4_has_feature_resize_inode(sb) || !le16_to_cpu(es->s_reserved_gdt_blocks)) { ext4_warning(sb, "No reserved GDT blocks, can't resize"); return -EPERM; } inode = ext4_iget(sb, EXT4_RESIZE_INO, EXT4_IGET_SPECIAL); if (IS_ERR(inode)) { ext4_warning(sb, "Error opening resize inode"); return PTR_ERR(inode); } } err = verify_group_input(sb, input); if (err) goto out; err = ext4_alloc_flex_bg_array(sb, input->group + 1); if (err) goto out; err = ext4_mb_alloc_groupinfo(sb, input->group + 1); if (err) goto out; flex_gd.count = 1; flex_gd.groups = input; flex_gd.bg_flags = &bg_flags; err = ext4_flex_group_add(sb, inode, &flex_gd); out: iput(inode); return err; } /* ext4_group_add */ /* * extend a group without checking assuming that checking has been done. */ static int ext4_group_extend_no_check(struct super_block *sb, ext4_fsblk_t o_blocks_count, ext4_grpblk_t add) { struct ext4_super_block *es = EXT4_SB(sb)->s_es; handle_t *handle; int err = 0, err2; /* We will update the superblock, one block bitmap, and * one group descriptor via ext4_group_add_blocks(). */ handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, 3); if (IS_ERR(handle)) { err = PTR_ERR(handle); ext4_warning(sb, "error %d on journal start", err); return err; } BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, EXT4_SB(sb)->s_sbh, EXT4_JTR_NONE); if (err) { ext4_warning(sb, "error %d on journal write access", err); goto errout; } lock_buffer(EXT4_SB(sb)->s_sbh); ext4_blocks_count_set(es, o_blocks_count + add); ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add); ext4_superblock_csum_set(sb); unlock_buffer(EXT4_SB(sb)->s_sbh); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); if (err) goto errout; ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); errout: err2 = ext4_journal_stop(handle); if (err2 && !err) err = err2; if (!err) { if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: extended group to %llu " "blocks\n", ext4_blocks_count(es)); update_backups(sb, ext4_group_first_block_no(sb, 0), (char *)es, sizeof(struct ext4_super_block), 0); } return err; } /* * Extend the filesystem to the new number of blocks specified. This entry * point is only used to extend the current filesystem to the end of the last * existing group. It can be accessed via ioctl, or by "remount,resize=<size>" * for emergencies (because it has no dependencies on reserved blocks). * * If we _really_ wanted, we could use default values to call ext4_group_add() * allow the "remount" trick to work for arbitrary resizing, assuming enough * GDT blocks are reserved to grow to the desired size. */ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_fsblk_t n_blocks_count) { ext4_fsblk_t o_blocks_count; ext4_grpblk_t last; ext4_grpblk_t add; struct buffer_head *bh; ext4_group_t group; o_blocks_count = ext4_blocks_count(es); if (test_opt(sb, DEBUG)) ext4_msg(sb, KERN_DEBUG, "extending last group from %llu to %llu blocks", o_blocks_count, n_blocks_count); if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) return 0; if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { ext4_msg(sb, KERN_ERR, "filesystem too large to resize to %llu blocks safely", n_blocks_count); return -EINVAL; } if (n_blocks_count < o_blocks_count) { ext4_warning(sb, "can't shrink FS - resize aborted"); return -EINVAL; } /* Handle the remaining blocks in the last group only. */ ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); if (last == 0) { ext4_warning(sb, "need to use ext2online to resize further"); return -EPERM; } add = EXT4_BLOCKS_PER_GROUP(sb) - last; if (o_blocks_count + add < o_blocks_count) { ext4_warning(sb, "blocks_count overflow"); return -EINVAL; } if (o_blocks_count + add > n_blocks_count) add = n_blocks_count - o_blocks_count; if (o_blocks_count + add < n_blocks_count) ext4_warning(sb, "will only finish group (%llu blocks, %u new)", o_blocks_count + add, add); /* See if the device is actually as big as what was requested */ bh = ext4_sb_bread(sb, o_blocks_count + add - 1, 0); if (IS_ERR(bh)) { ext4_warning(sb, "can't read last block, resize aborted"); return -ENOSPC; } brelse(bh); return ext4_group_extend_no_check(sb, o_blocks_count, add); } /* ext4_group_extend */ static int num_desc_blocks(struct super_block *sb, ext4_group_t groups) { return (groups + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); } /* * Release the resize inode and drop the resize_inode feature if there * are no more reserved gdt blocks, and then convert the file system * to enable meta_bg */ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) { handle_t *handle; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct ext4_inode_info *ei = EXT4_I(inode); ext4_fsblk_t nr; int i, ret, err = 0; int credits = 1; ext4_msg(sb, KERN_INFO, "Converting file system to meta_bg"); if (inode) { if (es->s_reserved_gdt_blocks) { ext4_error(sb, "Unexpected non-zero " "s_reserved_gdt_blocks"); return -EPERM; } /* Do a quick sanity check of the resize inode */ if (inode->i_blocks != 1 << (inode->i_blkbits - (9 - sbi->s_cluster_bits))) goto invalid_resize_inode; for (i = 0; i < EXT4_N_BLOCKS; i++) { if (i == EXT4_DIND_BLOCK) { if (ei->i_data[i]) continue; else goto invalid_resize_inode; } if (ei->i_data[i]) goto invalid_resize_inode; } credits += 3; /* block bitmap, bg descriptor, resize inode */ } handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credits); if (IS_ERR(handle)) return PTR_ERR(handle); BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh, EXT4_JTR_NONE); if (err) goto errout; lock_buffer(sbi->s_sbh); ext4_clear_feature_resize_inode(sb); ext4_set_feature_meta_bg(sb); sbi->s_es->s_first_meta_bg = cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count)); ext4_superblock_csum_set(sb); unlock_buffer(sbi->s_sbh); err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); if (err) { ext4_std_error(sb, err); goto errout; } if (inode) { nr = le32_to_cpu(ei->i_data[EXT4_DIND_BLOCK]); ext4_free_blocks(handle, inode, NULL, nr, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); ei->i_data[EXT4_DIND_BLOCK] = 0; inode->i_blocks = 0; err = ext4_mark_inode_dirty(handle, inode); if (err) ext4_std_error(sb, err); } errout: ret = ext4_journal_stop(handle); return err ? err : ret; invalid_resize_inode: ext4_error(sb, "corrupted/inconsistent resize inode"); return -EINVAL; } /* * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count * * @sb: super block of the fs to be resized * @n_blocks_count: the number of blocks resides in the resized fs */ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) { struct ext4_new_flex_group_data *flex_gd = NULL; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct buffer_head *bh; struct inode *resize_inode = NULL; ext4_grpblk_t add, offset; unsigned long n_desc_blocks; unsigned long o_desc_blocks; ext4_group_t o_group; ext4_group_t n_group; ext4_fsblk_t o_blocks_count; ext4_fsblk_t n_blocks_count_retry = 0; unsigned long last_update_time = 0; int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; int meta_bg; /* See if the device is actually as big as what was requested */ bh = ext4_sb_bread(sb, n_blocks_count - 1, 0); if (IS_ERR(bh)) { ext4_warning(sb, "can't read last block, resize aborted"); return -ENOSPC; } brelse(bh); /* * For bigalloc, trim the requested size to the nearest cluster * boundary to avoid creating an unusable filesystem. We do this * silently, instead of returning an error, to avoid breaking * callers that blindly resize the filesystem to the full size of * the underlying block device. */ if (ext4_has_feature_bigalloc(sb)) n_blocks_count &= ~((1 << EXT4_CLUSTER_BITS(sb)) - 1); retry: o_blocks_count = ext4_blocks_count(es); ext4_msg(sb, KERN_INFO, "resizing filesystem from %llu " "to %llu blocks", o_blocks_count, n_blocks_count); if (n_blocks_count < o_blocks_count) { /* On-line shrinking not supported */ ext4_warning(sb, "can't shrink FS - resize aborted"); return -EINVAL; } if (n_blocks_count == o_blocks_count) /* Nothing need to do */ return 0; n_group = ext4_get_group_number(sb, n_blocks_count - 1); if (n_group >= (0xFFFFFFFFUL / EXT4_INODES_PER_GROUP(sb))) { ext4_warning(sb, "resize would cause inodes_count overflow"); return -EINVAL; } ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); n_desc_blocks = num_desc_blocks(sb, n_group + 1); o_desc_blocks = num_desc_blocks(sb, sbi->s_groups_count); meta_bg = ext4_has_feature_meta_bg(sb); if (ext4_has_feature_resize_inode(sb)) { if (meta_bg) { ext4_error(sb, "resize_inode and meta_bg enabled " "simultaneously"); return -EINVAL; } if (n_desc_blocks > o_desc_blocks + le16_to_cpu(es->s_reserved_gdt_blocks)) { n_blocks_count_retry = n_blocks_count; n_desc_blocks = o_desc_blocks + le16_to_cpu(es->s_reserved_gdt_blocks); n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); n_blocks_count = (ext4_fsblk_t)n_group * EXT4_BLOCKS_PER_GROUP(sb) + le32_to_cpu(es->s_first_data_block); n_group--; /* set to last group number */ } if (!resize_inode) resize_inode = ext4_iget(sb, EXT4_RESIZE_INO, EXT4_IGET_SPECIAL); if (IS_ERR(resize_inode)) { ext4_warning(sb, "Error opening resize inode"); return PTR_ERR(resize_inode); } } if ((!resize_inode && !meta_bg) || n_blocks_count == o_blocks_count) { err = ext4_convert_meta_bg(sb, resize_inode); if (err) goto out; if (resize_inode) { iput(resize_inode); resize_inode = NULL; } if (n_blocks_count_retry) { n_blocks_count = n_blocks_count_retry; n_blocks_count_retry = 0; goto retry; } } /* * Make sure the last group has enough space so that it's * guaranteed to have enough space for all metadata blocks * that it might need to hold. (We might not need to store * the inode table blocks in the last block group, but there * will be cases where this might be needed.) */ if ((ext4_group_first_block_no(sb, n_group) + ext4_group_overhead_blocks(sb, n_group) + 2 + sbi->s_itb_per_group + sbi->s_cluster_ratio) >= n_blocks_count) { n_blocks_count = ext4_group_first_block_no(sb, n_group); n_group--; n_blocks_count_retry = 0; if (resize_inode) { iput(resize_inode); resize_inode = NULL; } goto retry; } /* extend the last group */ if (n_group == o_group) add = n_blocks_count - o_blocks_count; else add = EXT4_C2B(sbi, EXT4_CLUSTERS_PER_GROUP(sb) - (offset + 1)); if (add > 0) { err = ext4_group_extend_no_check(sb, o_blocks_count, add); if (err) goto out; } if (ext4_blocks_count(es) == n_blocks_count && n_blocks_count_retry == 0) goto out; err = ext4_alloc_flex_bg_array(sb, n_group + 1); if (err) goto out; err = ext4_mb_alloc_groupinfo(sb, n_group + 1); if (err) goto out; flex_gd = alloc_flex_gd(flexbg_size); if (flex_gd == NULL) { err = -ENOMEM; goto out; } /* Add flex groups. Note that a regular group is a * flex group with 1 group. */ while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, flexbg_size)) { if (time_is_before_jiffies(last_update_time + HZ * 10)) { if (last_update_time) ext4_msg(sb, KERN_INFO, "resized to %llu blocks", ext4_blocks_count(es)); last_update_time = jiffies; } if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0) break; err = ext4_flex_group_add(sb, resize_inode, flex_gd); if (unlikely(err)) break; } if (!err && n_blocks_count_retry) { n_blocks_count = n_blocks_count_retry; n_blocks_count_retry = 0; free_flex_gd(flex_gd); flex_gd = NULL; if (resize_inode) { iput(resize_inode); resize_inode = NULL; } goto retry; } out: if (flex_gd) free_flex_gd(flex_gd); if (resize_inode != NULL) iput(resize_inode); if (err) ext4_warning(sb, "error (%d) occurred during " "file system resize", err); ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", ext4_blocks_count(es)); return err; } |
1 1 2 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2007 The University of Aberdeen, Scotland, UK * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see https://www.wand.net.nz/ * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon * * Changes to meet Linux coding standards, to make it meet latest ccid3 draft * and to make it work as a loadable module in the DCCP stack written by * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. * * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #include <linux/string.h> #include <linux/slab.h> #include "packet_history.h" #include "../../dccp.h" /* * Transmitter History Routines */ static struct kmem_cache *tfrc_tx_hist_slab; int __init tfrc_tx_packet_history_init(void) { tfrc_tx_hist_slab = kmem_cache_create("tfrc_tx_hist", sizeof(struct tfrc_tx_hist_entry), 0, SLAB_HWCACHE_ALIGN, NULL); return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0; } void tfrc_tx_packet_history_exit(void) { if (tfrc_tx_hist_slab != NULL) { kmem_cache_destroy(tfrc_tx_hist_slab); tfrc_tx_hist_slab = NULL; } } int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) { struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); if (entry == NULL) return -ENOBUFS; entry->seqno = seqno; entry->stamp = ktime_get_real(); entry->next = *headp; *headp = entry; return 0; } void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) { struct tfrc_tx_hist_entry *head = *headp; while (head != NULL) { struct tfrc_tx_hist_entry *next = head->next; kmem_cache_free(tfrc_tx_hist_slab, head); head = next; } *headp = NULL; } /* * Receiver History Routines */ static struct kmem_cache *tfrc_rx_hist_slab; int __init tfrc_rx_packet_history_init(void) { tfrc_rx_hist_slab = kmem_cache_create("tfrc_rxh_cache", sizeof(struct tfrc_rx_hist_entry), 0, SLAB_HWCACHE_ALIGN, NULL); return tfrc_rx_hist_slab == NULL ? -ENOBUFS : 0; } void tfrc_rx_packet_history_exit(void) { if (tfrc_rx_hist_slab != NULL) { kmem_cache_destroy(tfrc_rx_hist_slab); tfrc_rx_hist_slab = NULL; } } static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry, const struct sk_buff *skb, const u64 ndp) { const struct dccp_hdr *dh = dccp_hdr(skb); entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; entry->tfrchrx_ccval = dh->dccph_ccval; entry->tfrchrx_type = dh->dccph_type; entry->tfrchrx_ndp = ndp; entry->tfrchrx_tstamp = ktime_get_real(); } void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, const u64 ndp) { struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h); tfrc_rx_hist_entry_from_skb(entry, skb, ndp); } /* has the packet contained in skb been seen before? */ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) { const u64 seq = DCCP_SKB_CB(skb)->dccpd_seq; int i; if (dccp_delta_seqno(tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, seq) <= 0) return 1; for (i = 1; i <= h->loss_count; i++) if (tfrc_rx_hist_entry(h, i)->tfrchrx_seqno == seq) return 1; return 0; } static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) { const u8 idx_a = tfrc_rx_hist_index(h, a), idx_b = tfrc_rx_hist_index(h, b); swap(h->ring[idx_a], h->ring[idx_b]); } /* * Private helper functions for loss detection. * * In the descriptions, `Si' refers to the sequence number of entry number i, * whose NDP count is `Ni' (lower case is used for variables). * Note: All __xxx_loss functions expect that a test against duplicates has been * performed already: the seqno of the skb must not be less than the seqno * of loss_prev; and it must not equal that of any valid history entry. */ static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1) { u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, s1 = DCCP_SKB_CB(skb)->dccpd_seq; if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */ h->loss_count = 1; tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1); } } static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2) { u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, s2 = DCCP_SKB_CB(skb)->dccpd_seq; if (likely(dccp_delta_seqno(s1, s2) > 0)) { /* S1 < S2 */ h->loss_count = 2; tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2); return; } /* S0 < S2 < S1 */ if (dccp_loss_free(s0, s2, n2)) { u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp; if (dccp_loss_free(s2, s1, n1)) { /* hole is filled: S0, S2, and S1 are consecutive */ h->loss_count = 0; h->loss_start = tfrc_rx_hist_index(h, 1); } else /* gap between S2 and S1: just update loss_prev */ tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2); } else { /* gap between S0 and S2 */ /* * Reorder history to insert S2 between S0 and S1 */ tfrc_rx_hist_swap(h, 0, 3); h->loss_start = tfrc_rx_hist_index(h, 3); tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n2); h->loss_count = 2; } } /* return 1 if a new loss event has been identified */ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) { u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno, s3 = DCCP_SKB_CB(skb)->dccpd_seq; if (likely(dccp_delta_seqno(s2, s3) > 0)) { /* S2 < S3 */ h->loss_count = 3; tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3); return 1; } /* S3 < S2 */ if (dccp_delta_seqno(s1, s3) > 0) { /* S1 < S3 < S2 */ /* * Reorder history to insert S3 between S1 and S2 */ tfrc_rx_hist_swap(h, 2, 3); tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3); h->loss_count = 3; return 1; } /* S0 < S3 < S1 */ if (dccp_loss_free(s0, s3, n3)) { u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp; if (dccp_loss_free(s3, s1, n1)) { /* hole between S0 and S1 filled by S3 */ u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp; if (dccp_loss_free(s1, s2, n2)) { /* entire hole filled by S0, S3, S1, S2 */ h->loss_start = tfrc_rx_hist_index(h, 2); h->loss_count = 0; } else { /* gap remains between S1 and S2 */ h->loss_start = tfrc_rx_hist_index(h, 1); h->loss_count = 1; } } else /* gap exists between S3 and S1, loss_count stays at 2 */ tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n3); return 0; } /* * The remaining case: S0 < S3 < S1 < S2; gap between S0 and S3 * Reorder history to insert S3 between S0 and S1. */ tfrc_rx_hist_swap(h, 0, 3); h->loss_start = tfrc_rx_hist_index(h, 3); tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n3); h->loss_count = 3; return 1; } /* recycle RX history records to continue loss detection if necessary */ static void __three_after_loss(struct tfrc_rx_hist *h) { /* * At this stage we know already that there is a gap between S0 and S1 * (since S0 was the highest sequence number received before detecting * the loss). To recycle the loss record, it is thus only necessary to * check for other possible gaps between S1/S2 and between S2/S3. */ u64 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno, s3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_seqno; u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp, n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp; if (dccp_loss_free(s1, s2, n2)) { if (dccp_loss_free(s2, s3, n3)) { /* no gap between S2 and S3: entire hole is filled */ h->loss_start = tfrc_rx_hist_index(h, 3); h->loss_count = 0; } else { /* gap between S2 and S3 */ h->loss_start = tfrc_rx_hist_index(h, 2); h->loss_count = 1; } } else { /* gap between S1 and S2 */ h->loss_start = tfrc_rx_hist_index(h, 1); h->loss_count = 2; } } /** * tfrc_rx_handle_loss - Loss detection and further processing * @h: The non-empty RX history object * @lh: Loss Intervals database to update * @skb: Currently received packet * @ndp: The NDP count belonging to @skb * @calc_first_li: Caller-dependent computation of first loss interval in @lh * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) * * Chooses action according to pending loss, updates LI database when a new * loss was detected, and does required post-processing. Returns 1 when caller * should send feedback, 0 otherwise. * Since it also takes care of reordering during loss detection and updates the * records accordingly, the caller should not perform any more RX history * operations when loss_count is greater than 0 after calling this function. */ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh, struct sk_buff *skb, const u64 ndp, u32 (*calc_first_li)(struct sock *), struct sock *sk) { int is_new_loss = 0; if (h->loss_count == 0) { __do_track_loss(h, skb, ndp); } else if (h->loss_count == 1) { __one_after_loss(h, skb, ndp); } else if (h->loss_count != 2) { DCCP_BUG("invalid loss_count %d", h->loss_count); } else if (__two_after_loss(h, skb, ndp)) { /* * Update Loss Interval database and recycle RX records */ is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk); __three_after_loss(h); } return is_new_loss; } int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) { int i; for (i = 0; i <= TFRC_NDUPACK; i++) { h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC); if (h->ring[i] == NULL) goto out_free; } h->loss_count = h->loss_start = 0; return 0; out_free: while (i-- != 0) { kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]); h->ring[i] = NULL; } return -ENOBUFS; } void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) { int i; for (i = 0; i <= TFRC_NDUPACK; ++i) if (h->ring[i] != NULL) { kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]); h->ring[i] = NULL; } } /** * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against * @h: The non-empty RX history object */ static inline struct tfrc_rx_hist_entry * tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h) { return h->ring[0]; } /** * tfrc_rx_hist_rtt_prev_s - previously suitable (wrt rtt_last_s) RTT-sampling entry * @h: The non-empty RX history object */ static inline struct tfrc_rx_hist_entry * tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h) { return h->ring[h->rtt_sample_prev]; } /** * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal * @h: receive histogram * @skb: packet containing timestamp. * * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able * to compute a sample with given data - calling function should check this. */ u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) { u32 sample = 0, delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */ if (h->rtt_sample_prev == 2) { /* previous candidate stored */ sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); if (sample) sample = 4 / sample * ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp, tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp); else /* * FIXME: This condition is in principle not * possible but occurs when CCID is used for * two-way data traffic. I have tried to trace * it, but the cause does not seem to be here. */ DCCP_BUG("please report to dccp@vger.kernel.org" " => prev = %u, last = %u", tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); } else if (delta_v < 1) { h->rtt_sample_prev = 1; goto keep_ref_for_next_time; } } else if (delta_v == 4) /* optimal match */ sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp)); else { /* suboptimal match */ h->rtt_sample_prev = 2; goto keep_ref_for_next_time; } if (unlikely(sample > DCCP_SANE_RTT_MAX)) { DCCP_WARN("RTT sample %u too large, using max\n", sample); sample = DCCP_SANE_RTT_MAX; } h->rtt_sample_prev = 0; /* use current entry as next reference */ keep_ref_for_next_time: return sample; } |
3 3 3 3 3 3 10 18 16 5 8 8 8 21 1 5 12 12 1 28 22 5 10 7 8 3 48 41 6 2 11 16 2 3 11 2 2 2 4 4 25 25 25 62 29 9 7 15 30 1 29 29 25 23 6 4 2 27 3 30 1 1 1 53 1 1 4 1 1 47 44 1 2 47 1 49 43 1 2 3 44 44 47 54 5 49 21 21 2 19 12 5 4 5 30 20 1 1 1 1 1 17 13 1 1 2 2 1 4 1 1 2 7 2 2 5 5 1 210 5 30 1 20 21 16 5 19 12 7 17 8 9 10 7 9 8 16 1 16 1 15 2 15 2 10 7 10 7 16 1 18 9 7 10 7 4 4 80 147 2 2 2 1 1 1 1 1 4 3 2 142 5 207 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 | // SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 tunneling device * Linux INET6 implementation * * Authors: * Ville Nuorvala <vnuorval@tcs.hut.fi> * Yasuyuki Kozakai <kozakai@linux-ipv6.org> * * Based on: * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c * * RFC 2473 */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/sockios.h> #include <linux/icmp.h> #include <linux/if.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/icmpv6.h> #include <linux/init.h> #include <linux/route.h> #include <linux/rtnetlink.h> #include <linux/netfilter_ipv6.h> #include <linux/slab.h> #include <linux/hash.h> #include <linux/etherdevice.h> #include <linux/uaccess.h> #include <linux/atomic.h> #include <net/icmp.h> #include <net/ip.h> #include <net/ip_tunnels.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/ip6_tunnel.h> #include <net/xfrm.h> #include <net/dsfield.h> #include <net/inet_ecn.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/dst_metadata.h> MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("ip6tnl"); MODULE_ALIAS_NETDEV("ip6tnl0"); #define IP6_TUNNEL_HASH_SIZE_SHIFT 5 #define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT) static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) { u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT); } static int ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); static struct rtnl_link_ops ip6_link_ops __read_mostly; static unsigned int ip6_tnl_net_id __read_mostly; struct ip6_tnl_net { /* the IPv6 tunnel fallback device */ struct net_device *fb_tnl_dev; /* lists for storing tunnels in use */ struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE]; struct ip6_tnl __rcu *tnls_wc[1]; struct ip6_tnl __rcu **tnls[2]; struct ip6_tnl __rcu *collect_md_tun; }; static inline int ip6_tnl_mpls_supported(void) { return IS_ENABLED(CONFIG_MPLS); } #define for_each_ip6_tunnel_rcu(start) \ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses * @net: network namespace * @link: ifindex of underlying interface * @remote: the address of the tunnel exit-point * @local: the address of the tunnel entry-point * * Return: * tunnel matching given end-points if found, * else fallback tunnel if its device is up, * else %NULL **/ static struct ip6_tnl * ip6_tnl_lookup(struct net *net, int link, const struct in6_addr *remote, const struct in6_addr *local) { unsigned int hash = HASH(remote, local); struct ip6_tnl *t, *cand = NULL; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct in6_addr any; for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (!ipv6_addr_equal(local, &t->parms.laddr) || !ipv6_addr_equal(remote, &t->parms.raddr) || !(t->dev->flags & IFF_UP)) continue; if (link == t->parms.link) return t; else cand = t; } memset(&any, 0, sizeof(any)); hash = HASH(&any, local); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (!ipv6_addr_equal(local, &t->parms.laddr) || !ipv6_addr_any(&t->parms.raddr) || !(t->dev->flags & IFF_UP)) continue; if (link == t->parms.link) return t; else if (!cand) cand = t; } hash = HASH(remote, &any); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (!ipv6_addr_equal(remote, &t->parms.raddr) || !ipv6_addr_any(&t->parms.laddr) || !(t->dev->flags & IFF_UP)) continue; if (link == t->parms.link) return t; else if (!cand) cand = t; } if (cand) return cand; t = rcu_dereference(ip6n->collect_md_tun); if (t && t->dev->flags & IFF_UP) return t; t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; return NULL; } /** * ip6_tnl_bucket - get head of list matching given tunnel parameters * @ip6n: the private data for ip6_vti in the netns * @p: parameters containing tunnel end-points * * Description: * ip6_tnl_bucket() returns the head of the list matching the * &struct in6_addr entries laddr and raddr in @p. * * Return: head of IPv6 tunnel list **/ static struct ip6_tnl __rcu ** ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; unsigned int h = 0; int prio = 0; if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { prio = 1; h = HASH(remote, local); } return &ip6n->tnls[prio][h]; } /** * ip6_tnl_link - add tunnel to hash table * @ip6n: the private data for ip6_vti in the netns * @t: tunnel to be added **/ static void ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); if (t->parms.collect_md) rcu_assign_pointer(ip6n->collect_md_tun, t); rcu_assign_pointer(t->next , rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } /** * ip6_tnl_unlink - remove tunnel from hash table * @ip6n: the private data for ip6_vti in the netns * @t: tunnel to be removed **/ static void ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp; struct ip6_tnl *iter; if (t->parms.collect_md) rcu_assign_pointer(ip6n->collect_md_tun, NULL); for (tp = ip6_tnl_bucket(ip6n, &t->parms); (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { rcu_assign_pointer(*tp, t->next); break; } } } static void ip6_dev_free(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); gro_cells_destroy(&t->gro_cells); dst_cache_destroy(&t->dst_cache); free_percpu(dev->tstats); } static int ip6_tnl_create2(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); int err; dev->rtnl_link_ops = &ip6_link_ops; err = register_netdevice(dev); if (err < 0) goto out; strcpy(t->parms.name, dev->name); ip6_tnl_link(ip6n, t); return 0; out: return err; } /** * ip6_tnl_create - create a new tunnel * @net: network namespace * @p: tunnel parameters * * Description: * Create tunnel matching given parameters. * * Return: * created tunnel or error pointer **/ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) { struct net_device *dev; struct ip6_tnl *t; char name[IFNAMSIZ]; int err = -E2BIG; if (p->name[0]) { if (!dev_valid_name(p->name)) goto failed; strscpy(name, p->name, IFNAMSIZ); } else { sprintf(name, "ip6tnl%%d"); } err = -ENOMEM; dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ip6_tnl_dev_setup); if (!dev) goto failed; dev_net_set(dev, net); t = netdev_priv(dev); t->parms = *p; t->net = dev_net(dev); err = ip6_tnl_create2(dev); if (err < 0) goto failed_free; return t; failed_free: free_netdev(dev); failed: return ERR_PTR(err); } /** * ip6_tnl_locate - find or create tunnel matching given parameters * @net: network namespace * @p: tunnel parameters * @create: != 0 if allowed to create new tunnel if no match found * * Description: * ip6_tnl_locate() first tries to locate an existing tunnel * based on @parms. If this is unsuccessful, but @create is set a new * tunnel device is created and registered for use. * * Return: * matching tunnel or error pointer **/ static struct ip6_tnl *ip6_tnl_locate(struct net *net, struct __ip6_tnl_parm *p, int create) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; struct ip6_tnl __rcu **tp; struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); for (tp = ip6_tnl_bucket(ip6n, p); (t = rtnl_dereference(*tp)) != NULL; tp = &t->next) { if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr) && p->link == t->parms.link) { if (create) return ERR_PTR(-EEXIST); return t; } } if (!create) return ERR_PTR(-ENODEV); return ip6_tnl_create(net, p); } /** * ip6_tnl_dev_uninit - tunnel device uninitializer * @dev: the device to be destroyed * * Description: * ip6_tnl_dev_uninit() removes tunnel from its list **/ static void ip6_tnl_dev_uninit(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); if (dev == ip6n->fb_tnl_dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else ip6_tnl_unlink(ip6n, t); dst_cache_reset(&t->dst_cache); netdev_put(dev, &t->dev_tracker); } /** * ip6_tnl_parse_tlv_enc_lim - handle encapsulation limit option * @skb: received socket buffer * @raw: the ICMPv6 error message data * * Return: * 0 if none was found, * else index to encapsulation limit **/ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) { const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw; unsigned int nhoff = raw - skb->data; unsigned int off = nhoff + sizeof(*ipv6h); u8 next, nexthdr = ipv6h->nexthdr; while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { struct ipv6_opt_hdr *hdr; u16 optlen; if (!pskb_may_pull(skb, off + sizeof(*hdr))) break; hdr = (struct ipv6_opt_hdr *)(skb->data + off); if (nexthdr == NEXTHDR_FRAGMENT) { struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr; if (frag_hdr->frag_off) break; optlen = 8; } else if (nexthdr == NEXTHDR_AUTH) { optlen = ipv6_authlen(hdr); } else { optlen = ipv6_optlen(hdr); } /* cache hdr->nexthdr, since pskb_may_pull() might * invalidate hdr */ next = hdr->nexthdr; if (nexthdr == NEXTHDR_DEST) { u16 i = 2; /* Remember : hdr is no longer valid at this point. */ if (!pskb_may_pull(skb, off + optlen)) break; while (1) { struct ipv6_tlv_tnl_enc_lim *tel; /* No more room for encapsulation limit */ if (i + sizeof(*tel) > optlen) break; tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i); /* return index of option if found and valid */ if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && tel->length == 1) return i + off - nhoff; /* else jump to next option */ if (tel->type) i += tel->length + 2; else i++; } } nexthdr = next; off += optlen; } return 0; } EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim); /* ip6_tnl_err() should handle errors in the tunnel according to the * specifications in RFC 2473. */ static int ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, u8 *type, u8 *code, int *msg, __u32 *info, int offset) { const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; struct net *net = dev_net(skb->dev); u8 rel_type = ICMPV6_DEST_UNREACH; u8 rel_code = ICMPV6_ADDR_UNREACH; __u32 rel_info = 0; struct ip6_tnl *t; int err = -ENOENT; int rel_msg = 0; u8 tproto; __u16 len; /* If the packet doesn't contain the original IPv6 header we are in trouble since we might need the source address for further processing of the error. */ rcu_read_lock(); t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->daddr, &ipv6h->saddr); if (!t) goto out; tproto = READ_ONCE(t->parms.proto); if (tproto != ipproto && tproto != 0) goto out; err = 0; switch (*type) { case ICMPV6_DEST_UNREACH: net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", t->parms.name); rel_msg = 1; break; case ICMPV6_TIME_EXCEED: if ((*code) == ICMPV6_EXC_HOPLIMIT) { net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", t->parms.name); rel_msg = 1; } break; case ICMPV6_PARAMPROB: { struct ipv6_tlv_tnl_enc_lim *tel; __u32 teli; teli = 0; if ((*code) == ICMPV6_HDR_FIELD) teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); if (teli && teli == *info - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; if (tel->encap_limit == 0) { net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", t->parms.name); rel_msg = 1; } } else { net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", t->parms.name); } break; } case ICMPV6_PKT_TOOBIG: { __u32 mtu; ip6_update_pmtu(skb, net, htonl(*info), 0, 0, sock_net_uid(net, NULL)); mtu = *info - offset; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len); if (len > mtu) { rel_type = ICMPV6_PKT_TOOBIG; rel_code = 0; rel_info = mtu; rel_msg = 1; } break; } case NDISC_REDIRECT: ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); break; } *type = rel_type; *code = rel_code; *info = rel_info; *msg = rel_msg; out: rcu_read_unlock(); return err; } static int ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { __u32 rel_info = ntohl(info); const struct iphdr *eiph; struct sk_buff *skb2; int err, rel_msg = 0; u8 rel_type = type; u8 rel_code = code; struct rtable *rt; struct flowi4 fl4; err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code, &rel_msg, &rel_info, offset); if (err < 0) return err; if (rel_msg == 0) return 0; switch (rel_type) { case ICMPV6_DEST_UNREACH: if (rel_code != ICMPV6_ADDR_UNREACH) return 0; rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_HOST_UNREACH; break; case ICMPV6_PKT_TOOBIG: if (rel_code != 0) return 0; rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_FRAG_NEEDED; break; default: return 0; } if (!pskb_may_pull(skb, offset + sizeof(struct iphdr))) return 0; skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) return 0; skb_dst_drop(skb2); skb_pull(skb2, offset); skb_reset_network_header(skb2); eiph = ip_hdr(skb2); /* Try to guess incoming interface */ rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr, 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0); if (IS_ERR(rt)) goto out; skb2->dev = rt->dst.dev; ip_rt_put(rt); /* route "incoming" packet */ if (rt->rt_flags & RTCF_LOCAL) { rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->daddr, eiph->saddr, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0); if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) { if (!IS_ERR(rt)) ip_rt_put(rt); goto out; } skb_dst_set(skb2, &rt->dst); } else { if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) goto out; } /* change mtu on this route */ if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) { if (rel_info > dst_mtu(skb_dst(skb2))) goto out; skb_dst_update_pmtu_no_confirm(skb2, rel_info); } icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); out: kfree_skb(skb2); return 0; } static int ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { __u32 rel_info = ntohl(info); int err, rel_msg = 0; u8 rel_type = type; u8 rel_code = code; err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, &rel_msg, &rel_info, offset); if (err < 0) return err; if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) { struct rt6_info *rt; struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) return 0; skb_dst_drop(skb2); skb_pull(skb2, offset); skb_reset_network_header(skb2); /* Try to guess incoming interface */ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, skb2, 0); if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; icmpv6_send(skb2, rel_type, rel_code, rel_info); ip6_rt_put(rt); kfree_skb(skb2); } return 0; } static int mplsip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { __u32 rel_info = ntohl(info); int err, rel_msg = 0; u8 rel_type = type; u8 rel_code = code; err = ip6_tnl_err(skb, IPPROTO_MPLS, opt, &rel_type, &rel_code, &rel_msg, &rel_info, offset); return err; } static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb) { __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); return IP6_ECN_decapsulate(ipv6h, skb); } static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb) { if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); return IP6_ECN_decapsulate(ipv6h, skb); } static inline int mplsip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb) { /* ECN is not supported in AF_MPLS */ return 0; } __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) { struct __ip6_tnl_parm *p = &t->parms; int ltype = ipv6_addr_type(laddr); int rtype = ipv6_addr_type(raddr); __u32 flags = 0; if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) { flags = IP6_TNL_F_CAP_PER_PACKET; } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { if (ltype&IPV6_ADDR_UNICAST) flags |= IP6_TNL_F_CAP_XMIT; if (rtype&IPV6_ADDR_UNICAST) flags |= IP6_TNL_F_CAP_RCV; } return flags; } EXPORT_SYMBOL(ip6_tnl_get_cap); /* called with rcu_read_lock() */ int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) { struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = t->net; if ((p->flags & IP6_TNL_F_CAP_RCV) || ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) { struct net_device *ldev = NULL; if (p->link) ldev = dev_get_by_index_rcu(net, p->link); if ((ipv6_addr_is_multicast(laddr) || likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false, 0, IFA_F_TENTATIVE))) && ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) || likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true, 0, IFA_F_TENTATIVE)))) ret = 1; } return ret; } EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb), bool log_ecn_err) { const struct ipv6hdr *ipv6h = ipv6_hdr(skb); int err; if ((!(tpi->flags & TUNNEL_CSUM) && (tunnel->parms.i_flags & TUNNEL_CSUM)) || ((tpi->flags & TUNNEL_CSUM) && !(tunnel->parms.i_flags & TUNNEL_CSUM))) { DEV_STATS_INC(tunnel->dev, rx_crc_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto drop; } if (tunnel->parms.i_flags & TUNNEL_SEQ) { if (!(tpi->flags & TUNNEL_SEQ) || (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { DEV_STATS_INC(tunnel->dev, rx_fifo_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto drop; } tunnel->i_seqno = ntohl(tpi->seq) + 1; } skb->protocol = tpi->proto; /* Warning: All skb pointers will be invalidated! */ if (tunnel->dev->type == ARPHRD_ETHER) { if (!pskb_may_pull(skb, ETH_HLEN)) { DEV_STATS_INC(tunnel->dev, rx_length_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto drop; } ipv6h = ipv6_hdr(skb); skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { skb->dev = tunnel->dev; skb_reset_mac_header(skb); } skb_reset_network_header(skb); memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); err = dscp_ecn_decapsulate(tunnel, ipv6h, skb); if (unlikely(err)) { if (log_ecn_err) net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n", &ipv6h->saddr, ipv6_get_dsfield(ipv6h)); if (err > 1) { DEV_STATS_INC(tunnel->dev, rx_frame_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto drop; } } dev_sw_netstats_rx_add(tunnel->dev, skb->len); skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); if (tun_dst) skb_dst_set(skb, (struct dst_entry *)tun_dst); gro_cells_receive(&tunnel->gro_cells, skb); return 0; drop: if (tun_dst) dst_release((struct dst_entry *)tun_dst); kfree_skb(skb); return 0; } int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_err) { int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb); dscp_ecn_decapsulate = ip6ip6_dscp_ecn_decapsulate; if (tpi->proto == htons(ETH_P_IP)) dscp_ecn_decapsulate = ip4ip6_dscp_ecn_decapsulate; return __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_err); } EXPORT_SYMBOL(ip6_tnl_rcv); static const struct tnl_ptk_info tpi_v6 = { /* no tunnel info required for ipxip6. */ .proto = htons(ETH_P_IPV6), }; static const struct tnl_ptk_info tpi_v4 = { /* no tunnel info required for ipxip6. */ .proto = htons(ETH_P_IP), }; static const struct tnl_ptk_info tpi_mpls = { /* no tunnel info required for mplsip6. */ .proto = htons(ETH_P_MPLS_UC), }; static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, const struct tnl_ptk_info *tpi, int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb)) { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct metadata_dst *tun_dst = NULL; int ret = -1; rcu_read_lock(); t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->saddr, &ipv6h->daddr); if (t) { u8 tproto = READ_ONCE(t->parms.proto); if (tproto != ipproto && tproto != 0) goto drop; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; ipv6h = ipv6_hdr(skb); if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) goto drop; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; if (t->parms.collect_md) { tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); if (!tun_dst) goto drop; } ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_error); } rcu_read_unlock(); return ret; drop: rcu_read_unlock(); kfree_skb(skb); return 0; } static int ip4ip6_rcv(struct sk_buff *skb) { return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4, ip4ip6_dscp_ecn_decapsulate); } static int ip6ip6_rcv(struct sk_buff *skb) { return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6, ip6ip6_dscp_ecn_decapsulate); } static int mplsip6_rcv(struct sk_buff *skb) { return ipxip6_rcv(skb, IPPROTO_MPLS, &tpi_mpls, mplsip6_dscp_ecn_decapsulate); } struct ipv6_tel_txoption { struct ipv6_txoptions ops; __u8 dst_opt[8]; }; static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) { memset(opt, 0, sizeof(struct ipv6_tel_txoption)); opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; opt->dst_opt[3] = 1; opt->dst_opt[4] = encap_limit; opt->dst_opt[5] = IPV6_TLV_PADN; opt->dst_opt[6] = 1; opt->ops.dst1opt = (struct ipv6_opt_hdr *) opt->dst_opt; opt->ops.opt_nflen = 8; } /** * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own * @t: the outgoing tunnel device * @hdr: IPv6 header from the incoming packet * * Description: * Avoid trivial tunneling loop by checking that tunnel exit-point * doesn't match source of incoming packet. * * Return: * 1 if conflict, * 0 else **/ static inline bool ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) { return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) { struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = t->net; if (t->parms.collect_md) return 1; if ((p->flags & IP6_TNL_F_CAP_XMIT) || ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) { struct net_device *ldev = NULL; rcu_read_lock(); if (p->link) ldev = dev_get_by_index_rcu(net, p->link); if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, 0, IFA_F_TENTATIVE))) pr_warn_ratelimited("%s xmit: Local address not yet configured!\n", p->name); else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && !ipv6_addr_is_multicast(raddr) && unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, true, 0, IFA_F_TENTATIVE))) pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n", p->name); else ret = 1; rcu_read_unlock(); } return ret; } EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl); /** * ip6_tnl_xmit - encapsulate packet and send * @skb: the outgoing socket buffer * @dev: the outgoing tunnel device * @dsfield: dscp code for outer header * @fl6: flow of tunneled packet * @encap_limit: encapsulation limit * @pmtu: Path MTU is stored if packet is too big * @proto: next header value * * Description: * Build new header and do some sanity checks on the packet before sending * it. * * Return: * 0 on success * -1 fail * %-EMSGSIZE message too big. return mtu in this case. **/ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct flowi6 *fl6, int encap_limit, __u32 *pmtu, __u8 proto) { struct ip6_tnl *t = netdev_priv(dev); struct net *net = t->net; struct ipv6hdr *ipv6h; struct ipv6_tel_txoption opt; struct dst_entry *dst = NULL, *ndst = NULL; struct net_device *tdev; int mtu; unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0; unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; unsigned int max_headroom = psh_hlen; __be16 payload_protocol; bool use_cache = false; u8 hop_limit; int err = -1; payload_protocol = skb_protocol(skb, true); if (t->parms.collect_md) { hop_limit = skb_tunnel_info(skb)->key.ttl; goto route_lookup; } else { hop_limit = t->parms.hop_limit; } /* NBMA tunnel */ if (ipv6_addr_any(&t->parms.raddr)) { if (payload_protocol == htons(ETH_P_IPV6)) { struct in6_addr *addr6; struct neighbour *neigh; int addr_type; if (!skb_dst(skb)) goto tx_err_link_failure; neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr); if (!neigh) goto tx_err_link_failure; addr6 = (struct in6_addr *)&neigh->primary_key; addr_type = ipv6_addr_type(addr6); if (addr_type == IPV6_ADDR_ANY) addr6 = &ipv6_hdr(skb)->daddr; memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); } else if (payload_protocol == htons(ETH_P_IP)) { const struct rtable *rt = skb_rtable(skb); if (!rt) goto tx_err_link_failure; if (rt->rt_gw_family == AF_INET6) memcpy(&fl6->daddr, &rt->rt_gw6, sizeof(fl6->daddr)); } } else if (t->parms.proto != 0 && !(t->parms.flags & (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { /* enable the cache only if neither the outer protocol nor the * routing decision depends on the current inner header value */ use_cache = true; } if (use_cache) dst = dst_cache_get(&t->dst_cache); if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) goto tx_err_link_failure; if (!dst) { route_lookup: /* add dsfield to flowlabel for route lookup */ fl6->flowlabel = ip6_make_flowinfo(dsfield, fl6->flowlabel); dst = ip6_route_output(net, NULL, fl6); if (dst->error) goto tx_err_link_failure; dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; goto tx_err_link_failure; } if (t->parms.collect_md && ipv6_addr_any(&fl6->saddr) && ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, &fl6->daddr, 0, &fl6->saddr)) goto tx_err_link_failure; ndst = dst; } tdev = dst->dev; if (tdev == dev) { DEV_STATS_INC(dev, collisions); net_warn_ratelimited("%s: Local routing loop detected!\n", t->parms.name); goto tx_err_dst_release; } mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen; if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; } mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ? IPV6_MIN_MTU : IPV4_MIN_MTU); skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; goto tx_err_dst_release; } if (t->err_count > 0) { if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) { t->err_count--; dst_link_failure(skb); } else { t->err_count = 0; } } skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom += LL_RESERVED_SPACE(tdev); if (skb_headroom(skb) < max_headroom || skb_shared(skb) || (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb; new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) goto tx_err_dst_release; if (skb->sk) skb_set_owner_w(new_skb, skb->sk); consume_skb(skb); skb = new_skb; } if (t->parms.collect_md) { if (t->encap.type != TUNNEL_ENCAP_NONE) goto tx_err_dst_release; } else { if (use_cache && ndst) dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); } skb_dst_set(skb, dst); if (hop_limit == 0) { if (payload_protocol == htons(ETH_P_IP)) hop_limit = ip_hdr(skb)->ttl; else if (payload_protocol == htons(ETH_P_IPV6)) hop_limit = ipv6_hdr(skb)->hop_limit; else hop_limit = ip6_dst_hoplimit(dst); } /* Calculate max headroom for all the headers and adjust * needed_headroom if necessary. */ max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) + dst->header_len + t->hlen; if (max_headroom > READ_ONCE(dev->needed_headroom)) WRITE_ONCE(dev->needed_headroom, max_headroom); err = ip6_tnl_encap(skb, t, &proto, fl6); if (err) return err; if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); ipv6_push_frag_opts(skb, &opt.ops, &proto); } skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); ip6_flow_hdr(ipv6h, dsfield, ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); ipv6h->hop_limit = hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; ip6tunnel_xmit(NULL, skb, dev); return 0; tx_err_link_failure: DEV_STATS_INC(dev, tx_carrier_errors); dst_link_failure(skb); tx_err_dst_release: dst_release(dst); return err; } EXPORT_SYMBOL(ip6_tnl_xmit); static inline int ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, u8 protocol) { struct ip6_tnl *t = netdev_priv(dev); struct ipv6hdr *ipv6h; const struct iphdr *iph; int encap_limit = -1; __u16 offset; struct flowi6 fl6; __u8 dsfield, orig_dsfield; __u32 mtu; u8 tproto; int err; tproto = READ_ONCE(t->parms.proto); if (tproto != protocol && tproto != 0) return -1; if (t->parms.collect_md) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || ip_tunnel_info_af(tun_info) != AF_INET6)) return -1; key = &tun_info->key; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = protocol; fl6.saddr = key->u.ipv6.src; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; dsfield = key->tos; switch (protocol) { case IPPROTO_IPIP: iph = ip_hdr(skb); orig_dsfield = ipv4_get_dsfield(iph); break; case IPPROTO_IPV6: ipv6h = ipv6_hdr(skb); orig_dsfield = ipv6_get_dsfield(ipv6h); break; default: orig_dsfield = dsfield; break; } } else { if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; if (protocol == IPPROTO_IPV6) { offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); /* ip6_tnl_parse_tlv_enc_lim() might have * reallocated skb->head */ if (offset > 0) { struct ipv6_tlv_tnl_enc_lim *tel; tel = (void *)&skb_network_header(skb)[offset]; if (tel->encap_limit == 0) { icmpv6_ndo_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offset + 2); return -1; } encap_limit = tel->encap_limit - 1; } } memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); fl6.flowi6_proto = protocol; if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; else fl6.flowi6_mark = t->parms.fwmark; switch (protocol) { case IPPROTO_IPIP: iph = ip_hdr(skb); orig_dsfield = ipv4_get_dsfield(iph); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) dsfield = orig_dsfield; else dsfield = ip6_tclass(t->parms.flowinfo); break; case IPPROTO_IPV6: ipv6h = ipv6_hdr(skb); orig_dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) dsfield = orig_dsfield; else dsfield = ip6_tclass(t->parms.flowinfo); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) fl6.flowlabel |= ip6_flowlabel(ipv6h); break; default: orig_dsfield = dsfield = ip6_tclass(t->parms.flowinfo); break; } } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); dsfield = INET_ECN_encapsulate(dsfield, orig_dsfield); if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; skb_set_inner_ipproto(skb, protocol); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, protocol); if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) switch (protocol) { case IPPROTO_IPIP: icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); break; case IPPROTO_IPV6: icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); break; default: break; } return -1; } return 0; } static netdev_tx_t ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); u8 ipproto; int ret; if (!pskb_inet_may_pull(skb)) goto tx_err; switch (skb->protocol) { case htons(ETH_P_IP): ipproto = IPPROTO_IPIP; break; case htons(ETH_P_IPV6): if (ip6_tnl_addr_conflict(t, ipv6_hdr(skb))) goto tx_err; ipproto = IPPROTO_IPV6; break; case htons(ETH_P_MPLS_UC): ipproto = IPPROTO_MPLS; break; default: goto tx_err; } ret = ipxip6_tnl_xmit(skb, dev, ipproto); if (ret < 0) goto tx_err; return NETDEV_TX_OK; tx_err: DEV_STATS_INC(dev, tx_errors); DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); return NETDEV_TX_OK; } static void ip6_tnl_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; struct net_device *tdev = NULL; struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; int t_hlen; int mtu; __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); /* Set up flowi template */ fl6->saddr = p->laddr; fl6->daddr = p->raddr; fl6->flowi6_oif = p->link; fl6->flowlabel = 0; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV) dev->flags |= IFF_POINTOPOINT; else dev->flags &= ~IFF_POINTOPOINT; t->tun_hlen = 0; t->hlen = t->encap_hlen + t->tun_hlen; t_hlen = t->hlen + sizeof(struct ipv6hdr); if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, p->link, NULL, strict); if (rt) { tdev = rt->dst.dev; ip6_rt_put(rt); } if (!tdev && p->link) tdev = __dev_get_by_index(t->net, p->link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + t_hlen; mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU); mtu = mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) mtu -= 8; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; WRITE_ONCE(dev->mtu, mtu); } } } /** * ip6_tnl_change - update the tunnel parameters * @t: tunnel to be changed * @p: tunnel configuration parameters * * Description: * ip6_tnl_change() updates the tunnel parameters **/ static void ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) { t->parms.laddr = p->laddr; t->parms.raddr = p->raddr; t->parms.flags = p->flags; t->parms.hop_limit = p->hop_limit; t->parms.encap_limit = p->encap_limit; t->parms.flowinfo = p->flowinfo; t->parms.link = p->link; t->parms.proto = p->proto; t->parms.fwmark = p->fwmark; dst_cache_reset(&t->dst_cache); ip6_tnl_link_config(t); } static void ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) { struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); ip6_tnl_unlink(ip6n, t); synchronize_net(); ip6_tnl_change(t, p); ip6_tnl_link(ip6n, t); netdev_state_change(t->dev); } static void ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) { /* for default tnl0 device allow to change only the proto */ t->parms.proto = p->proto; netdev_state_change(t->dev); } static void ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) { p->laddr = u->laddr; p->raddr = u->raddr; p->flags = u->flags; p->hop_limit = u->hop_limit; p->encap_limit = u->encap_limit; p->flowinfo = u->flowinfo; p->link = u->link; p->proto = u->proto; memcpy(p->name, u->name, sizeof(u->name)); } static void ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) { u->laddr = p->laddr; u->raddr = p->raddr; u->flags = p->flags; u->hop_limit = p->hop_limit; u->encap_limit = p->encap_limit; u->flowinfo = p->flowinfo; u->link = p->link; u->proto = p->proto; memcpy(u->name, p->name, sizeof(u->name)); } /** * ip6_tnl_siocdevprivate - configure ipv6 tunnels from userspace * @dev: virtual device associated with tunnel * @ifr: unused * @data: parameters passed from userspace * @cmd: command to be performed * * Description: * ip6_tnl_ioctl() is used for managing IPv6 tunnels * from userspace. * * The possible commands are the following: * %SIOCGETTUNNEL: get tunnel parameters for device * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters * %SIOCCHGTUNNEL: change tunnel parameters to those given * %SIOCDELTUNNEL: delete tunnel * * The fallback device "ip6tnl0", created during module * initialization, can be used for creating other tunnel devices. * * Return: * 0 on success, * %-EFAULT if unable to copy data to or from userspace, * %-EPERM if current process hasn't %CAP_NET_ADMIN set * %-EINVAL if passed tunnel parameters are invalid, * %-EEXIST if changing a tunnel's parameters would cause a conflict * %-ENODEV if attempting to change or delete a nonexisting device **/ static int ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { int err = 0; struct ip6_tnl_parm p; struct __ip6_tnl_parm p1; struct ip6_tnl *t = netdev_priv(dev); struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); memset(&p1, 0, sizeof(p1)); switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { if (copy_from_user(&p, data, sizeof(p))) { err = -EFAULT; break; } ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); if (IS_ERR(t)) t = netdev_priv(dev); } else { memset(&p, 0, sizeof(p)); } ip6_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; break; case SIOCADDTUNNEL: case SIOCCHGTUNNEL: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = -EFAULT; if (copy_from_user(&p, data, sizeof(p))) break; err = -EINVAL; if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && p.proto != 0) break; ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); if (cmd == SIOCCHGTUNNEL) { if (!IS_ERR(t)) { if (t->dev != dev) { err = -EEXIST; break; } } else t = netdev_priv(dev); if (dev == ip6n->fb_tnl_dev) ip6_tnl0_update(t, &p1); else ip6_tnl_update(t, &p1); } if (!IS_ERR(t)) { err = 0; ip6_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; } else { err = PTR_ERR(t); } break; case SIOCDELTUNNEL: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; if (dev == ip6n->fb_tnl_dev) { err = -EFAULT; if (copy_from_user(&p, data, sizeof(p))) break; err = -ENOENT; ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); if (IS_ERR(t)) break; err = -EPERM; if (t->dev == ip6n->fb_tnl_dev) break; dev = t->dev; } err = 0; unregister_netdevice(dev); break; default: err = -EINVAL; } return err; } /** * ip6_tnl_change_mtu - change mtu manually for tunnel device * @dev: virtual device associated with tunnel * @new_mtu: the new mtu * * Return: * 0 on success, * %-EINVAL if mtu too small **/ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { struct ip6_tnl *tnl = netdev_priv(dev); if (tnl->parms.proto == IPPROTO_IPV6) { if (new_mtu < IPV6_MIN_MTU) return -EINVAL; } else { if (new_mtu < ETH_MIN_MTU) return -EINVAL; } if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) { if (new_mtu > IP6_MAX_MTU - dev->hard_header_len) return -EINVAL; } else { if (new_mtu > IP_MAX_MTU - dev->hard_header_len) return -EINVAL; } dev->mtu = new_mtu; return 0; } EXPORT_SYMBOL(ip6_tnl_change_mtu); int ip6_tnl_get_iflink(const struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); return t->parms.link; } EXPORT_SYMBOL(ip6_tnl_get_iflink); int ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops, unsigned int num) { if (num >= MAX_IPTUN_ENCAP_OPS) return -ERANGE; return !cmpxchg((const struct ip6_tnl_encap_ops **) &ip6tun_encaps[num], NULL, ops) ? 0 : -1; } EXPORT_SYMBOL(ip6_tnl_encap_add_ops); int ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops, unsigned int num) { int ret; if (num >= MAX_IPTUN_ENCAP_OPS) return -ERANGE; ret = (cmpxchg((const struct ip6_tnl_encap_ops **) &ip6tun_encaps[num], ops, NULL) == ops) ? 0 : -1; synchronize_net(); return ret; } EXPORT_SYMBOL(ip6_tnl_encap_del_ops); int ip6_tnl_encap_setup(struct ip6_tnl *t, struct ip_tunnel_encap *ipencap) { int hlen; memset(&t->encap, 0, sizeof(t->encap)); hlen = ip6_encap_hlen(ipencap); if (hlen < 0) return hlen; t->encap.type = ipencap->type; t->encap.sport = ipencap->sport; t->encap.dport = ipencap->dport; t->encap.flags = ipencap->flags; t->encap_hlen = hlen; t->hlen = t->encap_hlen + t->tun_hlen; return 0; } EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup); static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, .ndo_start_xmit = ip6_tnl_start_xmit, .ndo_siocdevprivate = ip6_tnl_siocdevprivate, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; #define IPXIPX_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_HIGHDMA | \ NETIF_F_GSO_SOFTWARE | \ NETIF_F_HW_CSUM) /** * ip6_tnl_dev_setup - setup virtual tunnel device * @dev: virtual device associated with tunnel * * Description: * Initialize function pointers and device parameters **/ static void ip6_tnl_dev_setup(struct net_device *dev) { dev->netdev_ops = &ip6_tnl_netdev_ops; dev->header_ops = &ip_tunnel_header_ops; dev->needs_free_netdev = true; dev->priv_destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); dev->features |= NETIF_F_LLTX; netif_keep_dst(dev); dev->features |= IPXIPX_FEATURES; dev->hw_features |= IPXIPX_FEATURES; /* This perm addr will be used as interface identifier by IPv6 */ dev->addr_assign_type = NET_ADDR_RANDOM; eth_random_addr(dev->perm_addr); } /** * ip6_tnl_dev_init_gen - general initializer for all tunnel devices * @dev: virtual device associated with tunnel **/ static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); int ret; int t_hlen; t->dev = dev; t->net = dev_net(dev); dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; ret = dst_cache_init(&t->dst_cache, GFP_KERNEL); if (ret) goto free_stats; ret = gro_cells_init(&t->gro_cells, dev); if (ret) goto destroy_dst; t->tun_hlen = 0; t->hlen = t->encap_hlen + t->tun_hlen; t_hlen = t->hlen + sizeof(struct ipv6hdr); dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); return 0; destroy_dst: dst_cache_destroy(&t->dst_cache); free_stats: free_percpu(dev->tstats); dev->tstats = NULL; return ret; } /** * ip6_tnl_dev_init - initializer for all non fallback tunnel devices * @dev: virtual device associated with tunnel **/ static int ip6_tnl_dev_init(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); int err = ip6_tnl_dev_init_gen(dev); if (err) return err; ip6_tnl_link_config(t); if (t->parms.collect_md) netif_keep_dst(dev); return 0; } /** * ip6_fb_tnl_dev_init - initializer for fallback tunnel device * @dev: fallback device * * Return: 0 **/ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); t->parms.proto = IPPROTO_IPV6; rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { u8 proto; if (!data || !data[IFLA_IPTUN_PROTO]) return 0; proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); if (proto != IPPROTO_IPV6 && proto != IPPROTO_IPIP && proto != 0) return -EINVAL; return 0; } static void ip6_tnl_netlink_parms(struct nlattr *data[], struct __ip6_tnl_parm *parms) { memset(parms, 0, sizeof(*parms)); if (!data) return; if (data[IFLA_IPTUN_LINK]) parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); if (data[IFLA_IPTUN_LOCAL]) parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]); if (data[IFLA_IPTUN_REMOTE]) parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]); if (data[IFLA_IPTUN_TTL]) parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]); if (data[IFLA_IPTUN_ENCAP_LIMIT]) parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]); if (data[IFLA_IPTUN_FLOWINFO]) parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]); if (data[IFLA_IPTUN_FLAGS]) parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]); if (data[IFLA_IPTUN_PROTO]) parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); if (data[IFLA_IPTUN_COLLECT_METADATA]) parms->collect_md = true; if (data[IFLA_IPTUN_FWMARK]) parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct ip_tunnel_encap ipencap; struct ip6_tnl *nt, *t; int err; nt = netdev_priv(dev); if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { err = ip6_tnl_encap_setup(nt, &ipencap); if (err < 0) return err; } ip6_tnl_netlink_parms(data, &nt->parms); if (nt->parms.collect_md) { if (rtnl_dereference(ip6n->collect_md_tun)) return -EEXIST; } else { t = ip6_tnl_locate(net, &nt->parms, 0); if (!IS_ERR(t)) return -EEXIST; } err = ip6_tnl_create2(dev); if (!err && tb[IFLA_MTU]) ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); return err; } static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm p; struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct ip_tunnel_encap ipencap; if (dev == ip6n->fb_tnl_dev) return -EINVAL; if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { int err = ip6_tnl_encap_setup(t, &ipencap); if (err < 0) return err; } ip6_tnl_netlink_parms(data, &p); if (p.collect_md) return -EINVAL; t = ip6_tnl_locate(net, &p, 0); if (!IS_ERR(t)) { if (t->dev != dev) return -EEXIST; } else t = netdev_priv(dev); ip6_tnl_update(t, &p); return 0; } static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head) { struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); if (dev != ip6n->fb_tnl_dev) unregister_netdevice_queue(dev, head); } static size_t ip6_tnl_get_size(const struct net_device *dev) { return /* IFLA_IPTUN_LINK */ nla_total_size(4) + /* IFLA_IPTUN_LOCAL */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_IPTUN_REMOTE */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_IPTUN_TTL */ nla_total_size(1) + /* IFLA_IPTUN_ENCAP_LIMIT */ nla_total_size(1) + /* IFLA_IPTUN_FLOWINFO */ nla_total_size(4) + /* IFLA_IPTUN_FLAGS */ nla_total_size(4) + /* IFLA_IPTUN_PROTO */ nla_total_size(1) + /* IFLA_IPTUN_ENCAP_TYPE */ nla_total_size(2) + /* IFLA_IPTUN_ENCAP_FLAGS */ nla_total_size(2) + /* IFLA_IPTUN_ENCAP_SPORT */ nla_total_size(2) + /* IFLA_IPTUN_ENCAP_DPORT */ nla_total_size(2) + /* IFLA_IPTUN_COLLECT_METADATA */ nla_total_size(0) + /* IFLA_IPTUN_FWMARK */ nla_total_size(4) + 0; } static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip6_tnl *tunnel = netdev_priv(dev); struct __ip6_tnl_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) || nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) || nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) || nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags)) goto nla_put_failure; if (parm->collect_md) if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } struct net *ip6_tnl_get_link_net(const struct net_device *dev) { struct ip6_tnl *tunnel = netdev_priv(dev); return tunnel->net; } EXPORT_SYMBOL(ip6_tnl_get_link_net); static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) }, [IFLA_IPTUN_REMOTE] = { .len = sizeof(struct in6_addr) }, [IFLA_IPTUN_TTL] = { .type = NLA_U8 }, [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NLA_U8 }, [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 }, [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 }, }; static struct rtnl_link_ops ip6_link_ops __read_mostly = { .kind = "ip6tnl", .maxtype = IFLA_IPTUN_MAX, .policy = ip6_tnl_policy, .priv_size = sizeof(struct ip6_tnl), .setup = ip6_tnl_dev_setup, .validate = ip6_tnl_validate, .newlink = ip6_tnl_newlink, .changelink = ip6_tnl_changelink, .dellink = ip6_tnl_dellink, .get_size = ip6_tnl_get_size, .fill_info = ip6_tnl_fill_info, .get_link_net = ip6_tnl_get_link_net, }; static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { .handler = ip4ip6_rcv, .err_handler = ip4ip6_err, .priority = 1, }; static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { .handler = ip6ip6_rcv, .err_handler = ip6ip6_err, .priority = 1, }; static struct xfrm6_tunnel mplsip6_handler __read_mostly = { .handler = mplsip6_rcv, .err_handler = mplsip6_err, .priority = 1, }; static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct net_device *dev, *aux; int h; struct ip6_tnl *t; for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &ip6_link_ops) unregister_netdevice_queue(dev, list); for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ if (!net_eq(dev_net(t->dev), net)) unregister_netdevice_queue(t->dev, list); t = rtnl_dereference(t->next); } } t = rtnl_dereference(ip6n->tnls_wc[0]); while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ if (!net_eq(dev_net(t->dev), net)) unregister_netdevice_queue(t->dev, list); t = rtnl_dereference(t->next); } } static int __net_init ip6_tnl_init_net(struct net *net) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct ip6_tnl *t = NULL; int err; ip6n->tnls[0] = ip6n->tnls_wc; ip6n->tnls[1] = ip6n->tnls_r_l; if (!net_has_fallback_tunnels(net)) return 0; err = -ENOMEM; ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", NET_NAME_UNKNOWN, ip6_tnl_dev_setup); if (!ip6n->fb_tnl_dev) goto err_alloc_dev; dev_net_set(ip6n->fb_tnl_dev, net); ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops; /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL; err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); if (err < 0) goto err_register; err = register_netdev(ip6n->fb_tnl_dev); if (err < 0) goto err_register; t = netdev_priv(ip6n->fb_tnl_dev); strcpy(t->parms.name, ip6n->fb_tnl_dev->name); return 0; err_register: free_netdev(ip6n->fb_tnl_dev); err_alloc_dev: return err; } static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list) { struct net *net; LIST_HEAD(list); rtnl_lock(); list_for_each_entry(net, net_list, exit_list) ip6_tnl_destroy_tunnels(net, &list); unregister_netdevice_many(&list); rtnl_unlock(); } static struct pernet_operations ip6_tnl_net_ops = { .init = ip6_tnl_init_net, .exit_batch = ip6_tnl_exit_batch_net, .id = &ip6_tnl_net_id, .size = sizeof(struct ip6_tnl_net), }; /** * ip6_tunnel_init - register protocol and reserve needed resources * * Return: 0 on success **/ static int __init ip6_tunnel_init(void) { int err; if (!ipv6_mod_enabled()) return -EOPNOTSUPP; err = register_pernet_device(&ip6_tnl_net_ops); if (err < 0) goto out_pernet; err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET); if (err < 0) { pr_err("%s: can't register ip4ip6\n", __func__); goto out_ip4ip6; } err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6); if (err < 0) { pr_err("%s: can't register ip6ip6\n", __func__); goto out_ip6ip6; } if (ip6_tnl_mpls_supported()) { err = xfrm6_tunnel_register(&mplsip6_handler, AF_MPLS); if (err < 0) { pr_err("%s: can't register mplsip6\n", __func__); goto out_mplsip6; } } err = rtnl_link_register(&ip6_link_ops); if (err < 0) goto rtnl_link_failed; return 0; rtnl_link_failed: if (ip6_tnl_mpls_supported()) xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS); out_mplsip6: xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); out_ip6ip6: xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); out_ip4ip6: unregister_pernet_device(&ip6_tnl_net_ops); out_pernet: return err; } /** * ip6_tunnel_cleanup - free resources and unregister protocol **/ static void __exit ip6_tunnel_cleanup(void) { rtnl_link_unregister(&ip6_link_ops); if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) pr_info("%s: can't deregister ip4ip6\n", __func__); if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) pr_info("%s: can't deregister ip6ip6\n", __func__); if (ip6_tnl_mpls_supported() && xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS)) pr_info("%s: can't deregister mplsip6\n", __func__); unregister_pernet_device(&ip6_tnl_net_ops); } module_init(ip6_tunnel_init); module_exit(ip6_tunnel_cleanup); |
2 3 1 1 1 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 | // SPDX-License-Identifier: GPL-2.0-only /* * QNX4 file system, Linux implementation. * * Version : 0.2.1 * * Using parts of the xiafs filesystem. * * History : * * 01-06-1998 by Richard Frowijn : first release. * 20-06-1998 by Frank Denis : Linux 2.1.99+ support, boot signature, misc. * 30-06-1998 by Frank Denis : first step to write inodes. */ #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/highuid.h> #include <linux/pagemap.h> #include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/statfs.h> #include "qnx4.h" #define QNX4_VERSION 4 #define QNX4_BMNAME ".bitmap" static const struct super_operations qnx4_sops; static struct inode *qnx4_alloc_inode(struct super_block *sb); static void qnx4_free_inode(struct inode *inode); static int qnx4_remount(struct super_block *sb, int *flags, char *data); static int qnx4_statfs(struct dentry *, struct kstatfs *); static const struct super_operations qnx4_sops = { .alloc_inode = qnx4_alloc_inode, .free_inode = qnx4_free_inode, .statfs = qnx4_statfs, .remount_fs = qnx4_remount, }; static int qnx4_remount(struct super_block *sb, int *flags, char *data) { struct qnx4_sb_info *qs; sync_filesystem(sb); qs = qnx4_sb(sb); qs->Version = QNX4_VERSION; *flags |= SB_RDONLY; return 0; } static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_head *bh, int create ) { unsigned long phys; QNX4DEBUG((KERN_INFO "qnx4: qnx4_get_block inode=[%ld] iblock=[%ld]\n",inode->i_ino,iblock)); phys = qnx4_block_map( inode, iblock ); if ( phys ) { // logical block is before EOF map_bh(bh, inode->i_sb, phys); } return 0; } static inline u32 try_extent(qnx4_xtnt_t *extent, u32 *offset) { u32 size = le32_to_cpu(extent->xtnt_size); if (*offset < size) return le32_to_cpu(extent->xtnt_blk) + *offset - 1; *offset -= size; return 0; } unsigned long qnx4_block_map( struct inode *inode, long iblock ) { int ix; long i_xblk; struct buffer_head *bh = NULL; struct qnx4_xblk *xblk = NULL; struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode); u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts); u32 offset = iblock; u32 block = try_extent(&qnx4_inode->di_first_xtnt, &offset); if (block) { // iblock is in the first extent. This is easy. } else { // iblock is beyond first extent. We have to follow the extent chain. i_xblk = le32_to_cpu(qnx4_inode->di_xblk); ix = 0; while ( --nxtnt > 0 ) { if ( ix == 0 ) { // read next xtnt block. bh = sb_bread(inode->i_sb, i_xblk - 1); if ( !bh ) { QNX4DEBUG((KERN_ERR "qnx4: I/O error reading xtnt block [%ld])\n", i_xblk - 1)); return -EIO; } xblk = (struct qnx4_xblk*)bh->b_data; if ( memcmp( xblk->xblk_signature, "IamXblk", 7 ) ) { QNX4DEBUG((KERN_ERR "qnx4: block at %ld is not a valid xtnt\n", qnx4_inode->i_xblk)); return -EIO; } } block = try_extent(&xblk->xblk_xtnts[ix], &offset); if (block) { // got it! break; } if ( ++ix >= xblk->xblk_num_xtnts ) { i_xblk = le32_to_cpu(xblk->xblk_next_xblk); ix = 0; brelse( bh ); bh = NULL; } } if ( bh ) brelse( bh ); } QNX4DEBUG((KERN_INFO "qnx4: mapping block %ld of inode %ld = %ld\n",iblock,inode->i_ino,block)); return block; } static int qnx4_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); buf->f_type = sb->s_magic; buf->f_bsize = sb->s_blocksize; buf->f_blocks = le32_to_cpu(qnx4_sb(sb)->BitMap->di_size) * 8; buf->f_bfree = qnx4_count_free_blocks(sb); buf->f_bavail = buf->f_bfree; buf->f_namelen = QNX4_NAME_MAX; buf->f_fsid = u64_to_fsid(id); return 0; } /* * Check the root directory of the filesystem to make sure * it really _is_ a qnx4 filesystem, and to check the size * of the directory entry. */ static const char *qnx4_checkroot(struct super_block *sb, struct qnx4_super_block *s) { struct buffer_head *bh; struct qnx4_inode_entry *rootdir; int rd, rl; int i, j; if (s->RootDir.di_fname[0] != '/' || s->RootDir.di_fname[1] != '\0') return "no qnx4 filesystem (no root dir)."; QNX4DEBUG((KERN_NOTICE "QNX4 filesystem found on dev %s.\n", sb->s_id)); rd = le32_to_cpu(s->RootDir.di_first_xtnt.xtnt_blk) - 1; rl = le32_to_cpu(s->RootDir.di_first_xtnt.xtnt_size); for (j = 0; j < rl; j++) { bh = sb_bread(sb, rd + j); /* root dir, first block */ if (bh == NULL) return "unable to read root entry."; rootdir = (struct qnx4_inode_entry *) bh->b_data; for (i = 0; i < QNX4_INODES_PER_BLOCK; i++, rootdir++) { QNX4DEBUG((KERN_INFO "rootdir entry found : [%s]\n", rootdir->di_fname)); if (strcmp(rootdir->di_fname, QNX4_BMNAME) != 0) continue; qnx4_sb(sb)->BitMap = kmemdup(rootdir, sizeof(struct qnx4_inode_entry), GFP_KERNEL); brelse(bh); if (!qnx4_sb(sb)->BitMap) return "not enough memory for bitmap inode"; /* keep bitmap inode known */ return NULL; } brelse(bh); } return "bitmap file not found."; } static int qnx4_fill_super(struct super_block *s, void *data, int silent) { struct buffer_head *bh; struct inode *root; const char *errmsg; struct qnx4_sb_info *qs; qs = kzalloc(sizeof(struct qnx4_sb_info), GFP_KERNEL); if (!qs) return -ENOMEM; s->s_fs_info = qs; sb_set_blocksize(s, QNX4_BLOCK_SIZE); s->s_op = &qnx4_sops; s->s_magic = QNX4_SUPER_MAGIC; s->s_flags |= SB_RDONLY; /* Yup, read-only yet */ s->s_time_min = 0; s->s_time_max = U32_MAX; /* Check the superblock signature. Since the qnx4 code is dangerous, we should leave as quickly as possible if we don't belong here... */ bh = sb_bread(s, 1); if (!bh) { printk(KERN_ERR "qnx4: unable to read the superblock\n"); return -EINVAL; } /* check before allocating dentries, inodes, .. */ errmsg = qnx4_checkroot(s, (struct qnx4_super_block *) bh->b_data); brelse(bh); if (errmsg != NULL) { if (!silent) printk(KERN_ERR "qnx4: %s\n", errmsg); return -EINVAL; } /* does root not have inode number QNX4_ROOT_INO ?? */ root = qnx4_iget(s, QNX4_ROOT_INO * QNX4_INODES_PER_BLOCK); if (IS_ERR(root)) { printk(KERN_ERR "qnx4: get inode failed\n"); return PTR_ERR(root); } s->s_root = d_make_root(root); if (s->s_root == NULL) return -ENOMEM; return 0; } static void qnx4_kill_sb(struct super_block *sb) { struct qnx4_sb_info *qs = qnx4_sb(sb); kill_block_super(sb); if (qs) { kfree(qs->BitMap); kfree(qs); } } static int qnx4_read_folio(struct file *file, struct folio *folio) { return block_read_full_folio(folio, qnx4_get_block); } static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,qnx4_get_block); } static const struct address_space_operations qnx4_aops = { .read_folio = qnx4_read_folio, .bmap = qnx4_bmap }; struct inode *qnx4_iget(struct super_block *sb, unsigned long ino) { struct buffer_head *bh; struct qnx4_inode_entry *raw_inode; int block; struct qnx4_inode_entry *qnx4_inode; struct inode *inode; inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; qnx4_inode = qnx4_raw_inode(inode); inode->i_mode = 0; QNX4DEBUG((KERN_INFO "reading inode : [%d]\n", ino)); if (!ino) { printk(KERN_ERR "qnx4: bad inode number on dev %s: %lu is " "out of range\n", sb->s_id, ino); iget_failed(inode); return ERR_PTR(-EIO); } block = ino / QNX4_INODES_PER_BLOCK; if (!(bh = sb_bread(sb, block))) { printk(KERN_ERR "qnx4: major problem: unable to read inode from dev " "%s\n", sb->s_id); iget_failed(inode); return ERR_PTR(-EIO); } raw_inode = ((struct qnx4_inode_entry *) bh->b_data) + (ino % QNX4_INODES_PER_BLOCK); inode->i_mode = le16_to_cpu(raw_inode->di_mode); i_uid_write(inode, (uid_t)le16_to_cpu(raw_inode->di_uid)); i_gid_write(inode, (gid_t)le16_to_cpu(raw_inode->di_gid)); set_nlink(inode, le16_to_cpu(raw_inode->di_nlink)); inode->i_size = le32_to_cpu(raw_inode->di_size); inode_set_mtime(inode, le32_to_cpu(raw_inode->di_mtime), 0); inode_set_atime(inode, le32_to_cpu(raw_inode->di_atime), 0); inode_set_ctime(inode, le32_to_cpu(raw_inode->di_ctime), 0); inode->i_blocks = le32_to_cpu(raw_inode->di_first_xtnt.xtnt_size); memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE); if (S_ISREG(inode->i_mode)) { inode->i_fop = &generic_ro_fops; inode->i_mapping->a_ops = &qnx4_aops; qnx4_i(inode)->mmu_private = inode->i_size; } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &qnx4_dir_inode_operations; inode->i_fop = &qnx4_dir_operations; } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &page_symlink_inode_operations; inode_nohighmem(inode); inode->i_mapping->a_ops = &qnx4_aops; qnx4_i(inode)->mmu_private = inode->i_size; } else { printk(KERN_ERR "qnx4: bad inode %lu on dev %s\n", ino, sb->s_id); iget_failed(inode); brelse(bh); return ERR_PTR(-EIO); } brelse(bh); unlock_new_inode(inode); return inode; } static struct kmem_cache *qnx4_inode_cachep; static struct inode *qnx4_alloc_inode(struct super_block *sb) { struct qnx4_inode_info *ei; ei = alloc_inode_sb(sb, qnx4_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; } static void qnx4_free_inode(struct inode *inode) { kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); } static void init_once(void *foo) { struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) { qnx4_inode_cachep = kmem_cache_create("qnx4_inode_cache", sizeof(struct qnx4_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_ACCOUNT), init_once); if (qnx4_inode_cachep == NULL) return -ENOMEM; return 0; } static void destroy_inodecache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(qnx4_inode_cachep); } static struct dentry *qnx4_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_bdev(fs_type, flags, dev_name, data, qnx4_fill_super); } static struct file_system_type qnx4_fs_type = { .owner = THIS_MODULE, .name = "qnx4", .mount = qnx4_mount, .kill_sb = qnx4_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("qnx4"); static int __init init_qnx4_fs(void) { int err; err = init_inodecache(); if (err) return err; err = register_filesystem(&qnx4_fs_type); if (err) { destroy_inodecache(); return err; } printk(KERN_INFO "QNX4 filesystem 0.2.3 registered.\n"); return 0; } static void __exit exit_qnx4_fs(void) { unregister_filesystem(&qnx4_fs_type); destroy_inodecache(); } module_init(init_qnx4_fs) module_exit(exit_qnx4_fs) MODULE_LICENSE("GPL"); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2019 HUAWEI, Inc. * https://www.huawei.com/ */ #ifndef __EROFS_FS_COMPRESS_H #define __EROFS_FS_COMPRESS_H #include "internal.h" struct z_erofs_decompress_req { struct super_block *sb; struct page **in, **out; unsigned short pageofs_in, pageofs_out; unsigned int inputsize, outputsize; /* indicate the algorithm will be used for decompression */ unsigned int alg; bool inplace_io, partial_decoding, fillgaps; }; struct z_erofs_decompressor { int (*config)(struct super_block *sb, struct erofs_super_block *dsb, void *data, int size); int (*decompress)(struct z_erofs_decompress_req *rq, struct page **pagepool); char *name; }; /* some special page->private (unsigned long, see below) */ #define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2) #define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2) /* * For all pages in a pcluster, page->private should be one of * Type Last 2bits page->private * short-lived page 00 Z_EROFS_SHORTLIVED_PAGE * preallocated page (tryalloc) 00 Z_EROFS_PREALLOCATED_PAGE * cached/managed page 00 pointer to z_erofs_pcluster * online page (file-backed, 01/10/11 sub-index << 2 | count * some pages can be used for inplace I/O) * * page->mapping should be one of * Type page->mapping * short-lived page NULL * preallocated page NULL * cached/managed page non-NULL or NULL (invalidated/truncated page) * online page non-NULL * * For all managed pages, PG_private should be set with 1 extra refcount, * which is used for page reclaim / migration. */ /* * short-lived pages are pages directly from buddy system with specific * page->private (no need to set PagePrivate since these are non-LRU / * non-movable pages and bypass reclaim / migration code). */ static inline bool z_erofs_is_shortlived_page(struct page *page) { if (page->private != Z_EROFS_SHORTLIVED_PAGE) return false; DBG_BUGON(page->mapping); return true; } static inline bool z_erofs_put_shortlivedpage(struct page **pagepool, struct page *page) { if (!z_erofs_is_shortlived_page(page)) return false; /* short-lived pages should not be used by others at the same time */ if (page_ref_count(page) > 1) { put_page(page); } else { /* follow the pcluster rule above. */ erofs_pagepool_add(pagepool, page); } return true; } #define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, struct page *page) { return page->mapping == MNGD_MAPPING(sbi); } int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, unsigned int padbufsize); extern const struct z_erofs_decompressor erofs_decompressors[]; /* prototypes for specific algorithms */ int z_erofs_load_lzma_config(struct super_block *sb, struct erofs_super_block *dsb, void *data, int size); int z_erofs_load_deflate_config(struct super_block *sb, struct erofs_super_block *dsb, void *data, int size); int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool); int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool); #endif |
3 1 1 1 27 21 17 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 | // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2010-2011 EIA Electronics, // Kurt Van Dijck <kurt.van.dijck@eia.be> // Copyright (c) 2010-2011 EIA Electronics, // Pieter Beyens <pieter.beyens@eia.be> // Copyright (c) 2017-2019 Pengutronix, // Marc Kleine-Budde <kernel@pengutronix.de> // Copyright (c) 2017-2019 Pengutronix, // Oleksij Rempel <kernel@pengutronix.de> /* J1939 Address Claiming. * Address Claiming in the kernel * - keeps track of the AC states of ECU's, * - resolves NAME<=>SA taking into account the AC states of ECU's. * * All Address Claim msgs (including host-originated msg) are processed * at the receive path (a sent msg is always received again via CAN echo). * As such, the processing of AC msgs is done in the order on which msgs * are sent on the bus. * * This module doesn't send msgs itself (e.g. replies on Address Claims), * this is the responsibility of a user space application or daemon. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/netdevice.h> #include <linux/skbuff.h> #include "j1939-priv.h" static inline name_t j1939_skb_to_name(const struct sk_buff *skb) { return le64_to_cpup((__le64 *)skb->data); } static inline bool j1939_ac_msg_is_request(struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); int req_pgn; if (skb->len < 3 || skcb->addr.pgn != J1939_PGN_REQUEST) return false; req_pgn = skb->data[0] | (skb->data[1] << 8) | (skb->data[2] << 16); return req_pgn == J1939_PGN_ADDRESS_CLAIMED; } static int j1939_ac_verify_outgoing(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); if (skb->len != 8) { netdev_notice(priv->ndev, "tx address claim with dlc %i\n", skb->len); return -EPROTO; } if (skcb->addr.src_name != j1939_skb_to_name(skb)) { netdev_notice(priv->ndev, "tx address claim with different name\n"); return -EPROTO; } if (skcb->addr.sa == J1939_NO_ADDR) { netdev_notice(priv->ndev, "tx address claim with broadcast sa\n"); return -EPROTO; } /* ac must always be a broadcast */ if (skcb->addr.dst_name || skcb->addr.da != J1939_NO_ADDR) { netdev_notice(priv->ndev, "tx address claim with dest, not broadcast\n"); return -EPROTO; } return 0; } int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); int ret; u8 addr; /* network mgmt: address claiming msgs */ if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) { struct j1939_ecu *ecu; ret = j1939_ac_verify_outgoing(priv, skb); /* return both when failure & when successful */ if (ret < 0) return ret; ecu = j1939_ecu_get_by_name(priv, skcb->addr.src_name); if (!ecu) return -ENODEV; if (ecu->addr != skcb->addr.sa) /* hold further traffic for ecu, remove from parent */ j1939_ecu_unmap(ecu); j1939_ecu_put(ecu); } else if (skcb->addr.src_name) { /* assign source address */ addr = j1939_name_to_addr(priv, skcb->addr.src_name); if (!j1939_address_is_unicast(addr) && !j1939_ac_msg_is_request(skb)) { netdev_notice(priv->ndev, "tx drop: invalid sa for name 0x%016llx\n", skcb->addr.src_name); return -EADDRNOTAVAIL; } skcb->addr.sa = addr; } /* assign destination address */ if (skcb->addr.dst_name) { addr = j1939_name_to_addr(priv, skcb->addr.dst_name); if (!j1939_address_is_unicast(addr)) { netdev_notice(priv->ndev, "tx drop: invalid da for name 0x%016llx\n", skcb->addr.dst_name); return -EADDRNOTAVAIL; } skcb->addr.da = addr; } return 0; } static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_ecu *ecu, *prev; name_t name; if (skb->len != 8) { netdev_notice(priv->ndev, "rx address claim with wrong dlc %i\n", skb->len); return; } name = j1939_skb_to_name(skb); skcb->addr.src_name = name; if (!name) { netdev_notice(priv->ndev, "rx address claim without name\n"); return; } if (!j1939_address_is_valid(skcb->addr.sa)) { netdev_notice(priv->ndev, "rx address claim with broadcast sa\n"); return; } write_lock_bh(&priv->lock); /* Few words on the ECU ref counting: * * First we get an ECU handle, either with * j1939_ecu_get_by_name_locked() (increments the ref counter) * or j1939_ecu_create_locked() (initializes an ECU object * with a ref counter of 1). * * j1939_ecu_unmap_locked() will decrement the ref counter, * but only if the ECU was mapped before. So "ecu" still * belongs to us. * * j1939_ecu_timer_start() will increment the ref counter * before it starts the timer, so we can put the ecu when * leaving this function. */ ecu = j1939_ecu_get_by_name_locked(priv, name); if (ecu && ecu->addr == skcb->addr.sa) { /* The ISO 11783-5 standard, in "4.5.2 - Address claim * requirements", states: * d) No CF shall begin, or resume, transmission on the * network until 250 ms after it has successfully claimed * an address except when responding to a request for * address-claimed. * * But "Figure 6" and "Figure 7" in "4.5.4.2 - Address-claim * prioritization" show that the CF begins the transmission * after 250 ms from the first AC (address-claimed) message * even if it sends another AC message during that time window * to resolve the address contention with another CF. * * As stated in "4.4.2.3 - Address-claimed message": * In order to successfully claim an address, the CF sending * an address claimed message shall not receive a contending * claim from another CF for at least 250 ms. * * As stated in "4.4.3.2 - NAME management (NM) message": * 1) A commanding CF can * d) request that a CF with a specified NAME transmit * the address-claimed message with its current NAME. * 2) A target CF shall * d) send an address-claimed message in response to a * request for a matching NAME * * Taking the above arguments into account, the 250 ms wait is * requested only during network initialization. * * Do not restart the timer on AC message if both the NAME and * the address match and so if the address has already been * claimed (timer has expired) or the AC message has been sent * to resolve the contention with another CF (timer is still * running). */ goto out_ecu_put; } if (!ecu && j1939_address_is_unicast(skcb->addr.sa)) ecu = j1939_ecu_create_locked(priv, name); if (IS_ERR_OR_NULL(ecu)) goto out_unlock_bh; /* cancel pending (previous) address claim */ j1939_ecu_timer_cancel(ecu); if (j1939_address_is_idle(skcb->addr.sa)) { j1939_ecu_unmap_locked(ecu); goto out_ecu_put; } /* save new addr */ if (ecu->addr != skcb->addr.sa) j1939_ecu_unmap_locked(ecu); ecu->addr = skcb->addr.sa; prev = j1939_ecu_get_by_addr_locked(priv, skcb->addr.sa); if (prev) { if (ecu->name > prev->name) { j1939_ecu_unmap_locked(ecu); j1939_ecu_put(prev); goto out_ecu_put; } else { /* kick prev if less or equal */ j1939_ecu_unmap_locked(prev); j1939_ecu_put(prev); } } j1939_ecu_timer_start(ecu); out_ecu_put: j1939_ecu_put(ecu); out_unlock_bh: write_unlock_bh(&priv->lock); } void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_ecu *ecu; /* network mgmt */ if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) { j1939_ac_process(priv, skb); } else if (j1939_address_is_unicast(skcb->addr.sa)) { /* assign source name */ ecu = j1939_ecu_get_by_addr(priv, skcb->addr.sa); if (ecu) { skcb->addr.src_name = ecu->name; j1939_ecu_put(ecu); } } /* assign destination name */ ecu = j1939_ecu_get_by_addr(priv, skcb->addr.da); if (ecu) { skcb->addr.dst_name = ecu->name; j1939_ecu_put(ecu); } } |
2 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 | /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/module.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_socket.h> #include <net/inet_sock.h> #include <net/tcp.h> struct nft_socket { enum nft_socket_keys key:8; u8 level; u8 len; union { u8 dreg; }; }; static void nft_socket_wildcard(const struct nft_pktinfo *pkt, struct nft_regs *regs, struct sock *sk, u32 *dest) { switch (nft_pf(pkt)) { case NFPROTO_IPV4: nft_reg_store8(dest, inet_sk(sk)->inet_rcv_saddr == 0); break; #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case NFPROTO_IPV6: nft_reg_store8(dest, ipv6_addr_any(&sk->sk_v6_rcv_saddr)); break; #endif default: regs->verdict.code = NFT_BREAK; return; } } #ifdef CONFIG_SOCK_CGROUP_DATA static noinline bool nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level) { struct cgroup *cgrp; u64 cgid; if (!sk_fullsock(sk)) return false; cgrp = cgroup_ancestor(sock_cgroup_ptr(&sk->sk_cgrp_data), level); if (!cgrp) return false; cgid = cgroup_id(cgrp); memcpy(dest, &cgid, sizeof(u64)); return true; } #endif static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) { const struct net_device *indev = nft_in(pkt); const struct sk_buff *skb = pkt->skb; struct sock *sk = NULL; if (!indev) return NULL; switch (nft_pf(pkt)) { case NFPROTO_IPV4: sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev); break; #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case NFPROTO_IPV6: sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev); break; #endif default: WARN_ON_ONCE(1); break; } return sk; } static void nft_socket_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_socket *priv = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; struct sock *sk = skb->sk; u32 *dest = ®s->data[priv->dreg]; if (sk && !net_eq(nft_net(pkt), sock_net(sk))) sk = NULL; if (!sk) sk = nft_socket_do_lookup(pkt); if (!sk) { regs->verdict.code = NFT_BREAK; return; } switch(priv->key) { case NFT_SOCKET_TRANSPARENT: nft_reg_store8(dest, inet_sk_transparent(sk)); break; case NFT_SOCKET_MARK: if (sk_fullsock(sk)) { *dest = READ_ONCE(sk->sk_mark); } else { regs->verdict.code = NFT_BREAK; return; } break; case NFT_SOCKET_WILDCARD: if (!sk_fullsock(sk)) { regs->verdict.code = NFT_BREAK; return; } nft_socket_wildcard(pkt, regs, sk, dest); break; #ifdef CONFIG_SOCK_CGROUP_DATA case NFT_SOCKET_CGROUPV2: if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) { regs->verdict.code = NFT_BREAK; return; } break; #endif default: WARN_ON(1); regs->verdict.code = NFT_BREAK; } if (sk != skb->sk) sock_gen_put(sk); } static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = { [NFTA_SOCKET_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_SOCKET_DREG] = { .type = NLA_U32 }, [NFTA_SOCKET_LEVEL] = NLA_POLICY_MAX(NLA_BE32, 255), }; static int nft_socket_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_socket *priv = nft_expr_priv(expr); unsigned int len; if (!tb[NFTA_SOCKET_DREG] || !tb[NFTA_SOCKET_KEY]) return -EINVAL; switch(ctx->family) { case NFPROTO_IPV4: #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case NFPROTO_IPV6: #endif case NFPROTO_INET: break; default: return -EOPNOTSUPP; } priv->key = ntohl(nla_get_be32(tb[NFTA_SOCKET_KEY])); switch(priv->key) { case NFT_SOCKET_TRANSPARENT: case NFT_SOCKET_WILDCARD: len = sizeof(u8); break; case NFT_SOCKET_MARK: len = sizeof(u32); break; #ifdef CONFIG_CGROUPS case NFT_SOCKET_CGROUPV2: { unsigned int level; if (!tb[NFTA_SOCKET_LEVEL]) return -EINVAL; level = ntohl(nla_get_be32(tb[NFTA_SOCKET_LEVEL])); if (level > 255) return -EOPNOTSUPP; priv->level = level; len = sizeof(u64); break; } #endif default: return -EOPNOTSUPP; } priv->len = len; return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); } static int nft_socket_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_socket *priv = nft_expr_priv(expr); if (nla_put_be32(skb, NFTA_SOCKET_KEY, htonl(priv->key))) return -1; if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg)) return -1; if (priv->key == NFT_SOCKET_CGROUPV2 && nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level))) return -1; return 0; } static bool nft_socket_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { const struct nft_socket *priv = nft_expr_priv(expr); const struct nft_socket *socket; if (!nft_reg_track_cmp(track, expr, priv->dreg)) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } socket = nft_expr_priv(track->regs[priv->dreg].selector); if (priv->key != socket->key || priv->dreg != socket->dreg || priv->level != socket->level) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } if (!track->regs[priv->dreg].bitwise) return true; return nft_expr_reduce_bitwise(track, expr); } static int nft_socket_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT)); } static struct nft_expr_type nft_socket_type; static const struct nft_expr_ops nft_socket_ops = { .type = &nft_socket_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_socket)), .eval = nft_socket_eval, .init = nft_socket_init, .dump = nft_socket_dump, .validate = nft_socket_validate, .reduce = nft_socket_reduce, }; static struct nft_expr_type nft_socket_type __read_mostly = { .name = "socket", .ops = &nft_socket_ops, .policy = nft_socket_policy, .maxattr = NFTA_SOCKET_MAX, .owner = THIS_MODULE, }; static int __init nft_socket_module_init(void) { return nft_register_expr(&nft_socket_type); } static void __exit nft_socket_module_exit(void) { nft_unregister_expr(&nft_socket_type); } module_init(nft_socket_module_init); module_exit(nft_socket_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Máté Eckl"); MODULE_DESCRIPTION("nf_tables socket match module"); MODULE_ALIAS_NFT_EXPR("socket"); |
3 3 67 66 1 7 6 1 7 2 3 2 1 7 7 23 15 15 15 203 180 180 353 349 3 1 203 149 106 38 9 14 1 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/tcp.h> #include <linux/rcupdate.h> #include <net/tcp.h> void tcp_fastopen_init_key_once(struct net *net) { u8 key[TCP_FASTOPEN_KEY_LENGTH]; struct tcp_fastopen_context *ctxt; rcu_read_lock(); ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); if (ctxt) { rcu_read_unlock(); return; } rcu_read_unlock(); /* tcp_fastopen_reset_cipher publishes the new context * atomically, so we allow this race happening here. * * All call sites of tcp_fastopen_cookie_gen also check * for a valid cookie, so this is an acceptable risk. */ get_random_bytes(key, sizeof(key)); tcp_fastopen_reset_cipher(net, NULL, key, NULL); } static void tcp_fastopen_ctx_free(struct rcu_head *head) { struct tcp_fastopen_context *ctx = container_of(head, struct tcp_fastopen_context, rcu); kfree_sensitive(ctx); } void tcp_fastopen_destroy_cipher(struct sock *sk) { struct tcp_fastopen_context *ctx; ctx = rcu_dereference_protected( inet_csk(sk)->icsk_accept_queue.fastopenq.ctx, 1); if (ctx) call_rcu(&ctx->rcu, tcp_fastopen_ctx_free); } void tcp_fastopen_ctx_destroy(struct net *net) { struct tcp_fastopen_context *ctxt; ctxt = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, NULL); if (ctxt) call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); } int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, void *primary_key, void *backup_key) { struct tcp_fastopen_context *ctx, *octx; struct fastopen_queue *q; int err = 0; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) { err = -ENOMEM; goto out; } ctx->key[0].key[0] = get_unaligned_le64(primary_key); ctx->key[0].key[1] = get_unaligned_le64(primary_key + 8); if (backup_key) { ctx->key[1].key[0] = get_unaligned_le64(backup_key); ctx->key[1].key[1] = get_unaligned_le64(backup_key + 8); ctx->num = 2; } else { ctx->num = 1; } if (sk) { q = &inet_csk(sk)->icsk_accept_queue.fastopenq; octx = xchg((__force struct tcp_fastopen_context **)&q->ctx, ctx); } else { octx = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, ctx); } if (octx) call_rcu(&octx->rcu, tcp_fastopen_ctx_free); out: return err; } int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, u64 *key) { struct tcp_fastopen_context *ctx; int n_keys = 0, i; rcu_read_lock(); if (icsk) ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); else ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); if (ctx) { n_keys = tcp_fastopen_context_len(ctx); for (i = 0; i < n_keys; i++) { put_unaligned_le64(ctx->key[i].key[0], key + (i * 2)); put_unaligned_le64(ctx->key[i].key[1], key + (i * 2) + 1); } } rcu_read_unlock(); return n_keys; } static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, struct sk_buff *syn, const siphash_key_t *key, struct tcp_fastopen_cookie *foc) { BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64)); if (req->rsk_ops->family == AF_INET) { const struct iphdr *iph = ip_hdr(syn); foc->val[0] = cpu_to_le64(siphash(&iph->saddr, sizeof(iph->saddr) + sizeof(iph->daddr), key)); foc->len = TCP_FASTOPEN_COOKIE_SIZE; return true; } #if IS_ENABLED(CONFIG_IPV6) if (req->rsk_ops->family == AF_INET6) { const struct ipv6hdr *ip6h = ipv6_hdr(syn); foc->val[0] = cpu_to_le64(siphash(&ip6h->saddr, sizeof(ip6h->saddr) + sizeof(ip6h->daddr), key)); foc->len = TCP_FASTOPEN_COOKIE_SIZE; return true; } #endif return false; } /* Generate the fastopen cookie by applying SipHash to both the source and * destination addresses. */ static void tcp_fastopen_cookie_gen(struct sock *sk, struct request_sock *req, struct sk_buff *syn, struct tcp_fastopen_cookie *foc) { struct tcp_fastopen_context *ctx; rcu_read_lock(); ctx = tcp_fastopen_get_ctx(sk); if (ctx) __tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[0], foc); rcu_read_unlock(); } /* If an incoming SYN or SYNACK frame contains a payload and/or FIN, * queue this additional data / FIN. */ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt) return; skb = skb_clone(skb, GFP_ATOMIC); if (!skb) return; skb_dst_drop(skb); /* segs_in has been initialized to 1 in tcp_create_openreq_child(). * Hence, reset segs_in to 0 before calling tcp_segs_in() * to avoid double counting. Also, tcp_segs_in() expects * skb->len to include the tcp_hdrlen. Hence, it should * be called before __skb_pull(). */ tp->segs_in = 0; tcp_segs_in(tp, skb); __skb_pull(skb, tcp_hdrlen(skb)); sk_forced_mem_schedule(sk, skb->truesize); skb_set_owner_r(skb, sk); TCP_SKB_CB(skb)->seq++; TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; __skb_queue_tail(&sk->sk_receive_queue, skb); tp->syn_data_acked = 1; /* u64_stats_update_begin(&tp->syncp) not needed here, * as we certainly are not changing upper 32bit value (0) */ tp->bytes_received = skb->len; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) tcp_fin(sk); } /* returns 0 - no key match, 1 for primary, 2 for backup */ static int tcp_fastopen_cookie_gen_check(struct sock *sk, struct request_sock *req, struct sk_buff *syn, struct tcp_fastopen_cookie *orig, struct tcp_fastopen_cookie *valid_foc) { struct tcp_fastopen_cookie search_foc = { .len = -1 }; struct tcp_fastopen_cookie *foc = valid_foc; struct tcp_fastopen_context *ctx; int i, ret = 0; rcu_read_lock(); ctx = tcp_fastopen_get_ctx(sk); if (!ctx) goto out; for (i = 0; i < tcp_fastopen_context_len(ctx); i++) { __tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[i], foc); if (tcp_fastopen_cookie_match(foc, orig)) { ret = i + 1; goto out; } foc = &search_foc; } out: rcu_read_unlock(); return ret; } static struct sock *tcp_fastopen_create_child(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { struct tcp_sock *tp; struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; struct sock *child; bool own_req; child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, NULL, &own_req); if (!child) return NULL; spin_lock(&queue->fastopenq.lock); queue->fastopenq.qlen++; spin_unlock(&queue->fastopenq.lock); /* Initialize the child socket. Have to fix some values to take * into account the child is a Fast Open socket and is created * only out of the bits carried in the SYN packet. */ tp = tcp_sk(child); rcu_assign_pointer(tp->fastopen_rsk, req); tcp_rsk(req)->tfo_listener = true; /* RFC1323: The window in SYN & SYN/ACK segments is never * scaled. So correct it appropriately. */ tp->snd_wnd = ntohs(tcp_hdr(skb)->window); tp->max_window = tp->snd_wnd; /* Activate the retrans timer so that SYNACK can be retransmitted. * The request socket is not added to the ehash * because it's been added to the accept queue directly. */ req->timeout = tcp_timeout_init(child); inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, req->timeout, TCP_RTO_MAX); refcount_set(&req->rsk_refcnt, 2); /* Now finish processing the fastopen child socket. */ tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, skb); tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; tcp_fastopen_add_skb(child, skb); tcp_rsk(req)->rcv_nxt = tp->rcv_nxt; tp->rcv_wup = tp->rcv_nxt; /* tcp_conn_request() is sending the SYNACK, * and queues the child into listener accept queue. */ return child; } static bool tcp_fastopen_queue_check(struct sock *sk) { struct fastopen_queue *fastopenq; int max_qlen; /* Make sure the listener has enabled fastopen, and we don't * exceed the max # of pending TFO requests allowed before trying * to validating the cookie in order to avoid burning CPU cycles * unnecessarily. * * XXX (TFO) - The implication of checking the max_qlen before * processing a cookie request is that clients can't differentiate * between qlen overflow causing Fast Open to be disabled * temporarily vs a server not supporting Fast Open at all. */ fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq; max_qlen = READ_ONCE(fastopenq->max_qlen); if (max_qlen == 0) return false; if (fastopenq->qlen >= max_qlen) { struct request_sock *req1; spin_lock(&fastopenq->lock); req1 = fastopenq->rskq_rst_head; if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) { __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); spin_unlock(&fastopenq->lock); return false; } fastopenq->rskq_rst_head = req1->dl_next; fastopenq->qlen--; spin_unlock(&fastopenq->lock); reqsk_put(req1); } return true; } static bool tcp_fastopen_no_cookie(const struct sock *sk, const struct dst_entry *dst, int flag) { return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) || tcp_sk(sk)->fastopen_no_cookie || (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE)); } /* Returns true if we should perform Fast Open on the SYN. The cookie (foc) * may be updated and return the client in the SYN-ACK later. E.g., Fast Open * cookie request (foc->len == 0). */ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc, const struct dst_entry *dst) { bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct sock *child; int ret = 0; if (foc->len == 0) /* Client requests a cookie */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); if (!((tcp_fastopen & TFO_SERVER_ENABLE) && (syn_data || foc->len >= 0) && tcp_fastopen_queue_check(sk))) { foc->len = -1; return NULL; } if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) goto fastopen; if (foc->len == 0) { /* Client requests a cookie. */ tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc); } else if (foc->len > 0) { ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc, &valid_foc); if (!ret) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); } else { /* Cookie is valid. Create a (full) child socket to * accept the data in SYN before returning a SYN-ACK to * ack the data. If we fail to create the socket, fall * back and ack the ISN only but includes the same * cookie. * * Note: Data-less SYN with valid cookie is allowed to * send data in SYN_RECV state. */ fastopen: child = tcp_fastopen_create_child(sk, skb, req); if (child) { if (ret == 2) { valid_foc.exp = foc->exp; *foc = valid_foc; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEALTKEY); } else { foc->len = -1; } NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); return child; } NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); } } valid_foc.exp = foc->exp; *foc = valid_foc; return NULL; } bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie) { const struct dst_entry *dst; tcp_fastopen_cache_get(sk, mss, cookie); /* Firewall blackhole issue check */ if (tcp_fastopen_active_should_disable(sk)) { cookie->len = -1; return false; } dst = __sk_dst_get(sk); if (tcp_fastopen_no_cookie(sk, dst, TFO_CLIENT_NO_COOKIE)) { cookie->len = -1; return true; } if (cookie->len > 0) return true; tcp_sk(sk)->fastopen_client_fail = TFO_COOKIE_UNAVAILABLE; return false; } /* This function checks if we want to defer sending SYN until the first * write(). We defer under the following conditions: * 1. fastopen_connect sockopt is set * 2. we have a valid cookie * Return value: return true if we want to defer until application writes data * return false if we want to send out SYN immediately */ bool tcp_fastopen_defer_connect(struct sock *sk, int *err) { struct tcp_fastopen_cookie cookie = { .len = 0 }; struct tcp_sock *tp = tcp_sk(sk); u16 mss; if (tp->fastopen_connect && !tp->fastopen_req) { if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) { inet_set_bit(DEFER_CONNECT, sk); return true; } /* Alloc fastopen_req in order for FO option to be included * in SYN */ tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), sk->sk_allocation); if (tp->fastopen_req) tp->fastopen_req->cookie = cookie; else *err = -ENOBUFS; } return false; } EXPORT_SYMBOL(tcp_fastopen_defer_connect); /* * The following code block is to deal with middle box issues with TFO: * Middlebox firewall issues can potentially cause server's data being * blackholed after a successful 3WHS using TFO. * The proposed solution is to disable active TFO globally under the * following circumstances: * 1. client side TFO socket receives out of order FIN * 2. client side TFO socket receives out of order RST * 3. client side TFO socket has timed out three times consecutively during * or after handshake * We disable active side TFO globally for 1hr at first. Then if it * happens again, we disable it for 2h, then 4h, 8h, ... * And we reset the timeout back to 1hr when we see a successful active * TFO connection with data exchanges. */ /* Disable active TFO and record current jiffies and * tfo_active_disable_times */ void tcp_fastopen_active_disable(struct sock *sk) { struct net *net = sock_net(sk); if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)) return; /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ WRITE_ONCE(net->ipv4.tfo_active_disable_stamp, jiffies); /* Paired with smp_rmb() in tcp_fastopen_active_should_disable(). * We want net->ipv4.tfo_active_disable_stamp to be updated first. */ smp_mb__before_atomic(); atomic_inc(&net->ipv4.tfo_active_disable_times); NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE); } /* Calculate timeout for tfo active disable * Return true if we are still in the active TFO disable period * Return false if timeout already expired and we should use active TFO */ bool tcp_fastopen_active_should_disable(struct sock *sk) { unsigned int tfo_bh_timeout = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout); unsigned long timeout; int tfo_da_times; int multiplier; if (!tfo_bh_timeout) return false; tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); if (!tfo_da_times) return false; /* Paired with smp_mb__before_atomic() in tcp_fastopen_active_disable() */ smp_rmb(); /* Limit timeout to max: 2^6 * initial timeout */ multiplier = 1 << min(tfo_da_times - 1, 6); /* Paired with the WRITE_ONCE() in tcp_fastopen_active_disable(). */ timeout = READ_ONCE(sock_net(sk)->ipv4.tfo_active_disable_stamp) + multiplier * tfo_bh_timeout * HZ; if (time_before(jiffies, timeout)) return true; /* Mark check bit so we can check for successful active TFO * condition and reset tfo_active_disable_times */ tcp_sk(sk)->syn_fastopen_ch = 1; return false; } /* Disable active TFO if FIN is the only packet in the ofo queue * and no data is received. * Also check if we can reset tfo_active_disable_times if data is * received successfully on a marked active TFO sockets opened on * a non-loopback interface */ void tcp_fastopen_active_disable_ofo_check(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst; struct sk_buff *skb; if (!tp->syn_fastopen) return; if (!tp->data_segs_in) { skb = skb_rb_first(&tp->out_of_order_queue); if (skb && !skb_rb_next(skb)) { if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { tcp_fastopen_active_disable(sk); return; } } } else if (tp->syn_fastopen_ch && atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) { dst = sk_dst_get(sk); if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))) atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0); dst_release(dst); } } void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired) { u32 timeouts = inet_csk(sk)->icsk_retransmits; struct tcp_sock *tp = tcp_sk(sk); /* Broken middle-boxes may black-hole Fast Open connection during or * even after the handshake. Be extremely conservative and pause * Fast Open globally after hitting the third consecutive timeout or * exceeding the configured timeout limit. */ if ((tp->syn_fastopen || tp->syn_data || tp->syn_data_acked) && (timeouts == 2 || (timeouts < 2 && expired))) { tcp_fastopen_active_disable(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); } } |
2194 331 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2001-2003 Patrick Mochel <mochel@osdl.org> * Copyright (c) 2004-2009 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (c) 2008-2012 Novell Inc. * Copyright (c) 2012-2019 Greg Kroah-Hartman <gregkh@linuxfoundation.org> * Copyright (c) 2012-2019 Linux Foundation * * Core driver model functions and structures that should not be * shared outside of the drivers/base/ directory. * */ #include <linux/notifier.h> /** * struct subsys_private - structure to hold the private to the driver core portions of the bus_type/class structure. * * @subsys - the struct kset that defines this subsystem * @devices_kset - the subsystem's 'devices' directory * @interfaces - list of subsystem interfaces associated * @mutex - protect the devices, and interfaces lists. * * @drivers_kset - the list of drivers associated * @klist_devices - the klist to iterate over the @devices_kset * @klist_drivers - the klist to iterate over the @drivers_kset * @bus_notifier - the bus notifier list for anything that cares about things * on this bus. * @bus - pointer back to the struct bus_type that this structure is associated * with. * @dev_root: Default device to use as the parent. * * @glue_dirs - "glue" directory to put in-between the parent device to * avoid namespace conflicts * @class - pointer back to the struct class that this structure is associated * with. * @lock_key: Lock class key for use by the lock validator * * This structure is the one that is the actual kobject allowing struct * bus_type/class to be statically allocated safely. Nothing outside of the * driver core should ever touch these fields. */ struct subsys_private { struct kset subsys; struct kset *devices_kset; struct list_head interfaces; struct mutex mutex; struct kset *drivers_kset; struct klist klist_devices; struct klist klist_drivers; struct blocking_notifier_head bus_notifier; unsigned int drivers_autoprobe:1; const struct bus_type *bus; struct device *dev_root; struct kset glue_dirs; const struct class *class; struct lock_class_key lock_key; }; #define to_subsys_private(obj) container_of_const(obj, struct subsys_private, subsys.kobj) static inline struct subsys_private *subsys_get(struct subsys_private *sp) { if (sp) kset_get(&sp->subsys); return sp; } static inline void subsys_put(struct subsys_private *sp) { if (sp) kset_put(&sp->subsys); } struct subsys_private *class_to_subsys(const struct class *class); struct driver_private { struct kobject kobj; struct klist klist_devices; struct klist_node knode_bus; struct module_kobject *mkobj; struct device_driver *driver; }; #define to_driver(obj) container_of(obj, struct driver_private, kobj) /** * struct device_private - structure to hold the private to the driver core portions of the device structure. * * @klist_children - klist containing all children of this device * @knode_parent - node in sibling list * @knode_driver - node in driver list * @knode_bus - node in bus list * @knode_class - node in class list * @deferred_probe - entry in deferred_probe_list which is used to retry the * binding of drivers which were unable to get all the resources needed by * the device; typically because it depends on another driver getting * probed first. * @async_driver - pointer to device driver awaiting probe via async_probe * @device - pointer back to the struct device that this structure is * associated with. * @dead - This device is currently either in the process of or has been * removed from the system. Any asynchronous events scheduled for this * device should exit without taking any action. * * Nothing outside of the driver core should ever touch these fields. */ struct device_private { struct klist klist_children; struct klist_node knode_parent; struct klist_node knode_driver; struct klist_node knode_bus; struct klist_node knode_class; struct list_head deferred_probe; struct device_driver *async_driver; char *deferred_probe_reason; struct device *device; u8 dead:1; }; #define to_device_private_parent(obj) \ container_of(obj, struct device_private, knode_parent) #define to_device_private_driver(obj) \ container_of(obj, struct device_private, knode_driver) #define to_device_private_bus(obj) \ container_of(obj, struct device_private, knode_bus) #define to_device_private_class(obj) \ container_of(obj, struct device_private, knode_class) /* initialisation functions */ int devices_init(void); int buses_init(void); int classes_init(void); int firmware_init(void); #ifdef CONFIG_SYS_HYPERVISOR int hypervisor_init(void); #else static inline int hypervisor_init(void) { return 0; } #endif int platform_bus_init(void); void cpu_dev_init(void); void container_dev_init(void); #ifdef CONFIG_AUXILIARY_BUS void auxiliary_bus_init(void); #else static inline void auxiliary_bus_init(void) { } #endif struct kobject *virtual_device_parent(struct device *dev); int bus_add_device(struct device *dev); void bus_probe_device(struct device *dev); void bus_remove_device(struct device *dev); void bus_notify(struct device *dev, enum bus_notifier_event value); bool bus_is_registered(const struct bus_type *bus); int bus_add_driver(struct device_driver *drv); void bus_remove_driver(struct device_driver *drv); void device_release_driver_internal(struct device *dev, struct device_driver *drv, struct device *parent); void driver_detach(struct device_driver *drv); void driver_deferred_probe_del(struct device *dev); void device_set_deferred_probe_reason(const struct device *dev, struct va_format *vaf); static inline int driver_match_device(struct device_driver *drv, struct device *dev) { return drv->bus->match ? drv->bus->match(dev, drv) : 1; } static inline void dev_sync_state(struct device *dev) { if (dev->bus->sync_state) dev->bus->sync_state(dev); else if (dev->driver && dev->driver->sync_state) dev->driver->sync_state(dev); } int driver_add_groups(struct device_driver *drv, const struct attribute_group **groups); void driver_remove_groups(struct device_driver *drv, const struct attribute_group **groups); void device_driver_detach(struct device *dev); int devres_release_all(struct device *dev); void device_block_probing(void); void device_unblock_probing(void); void deferred_probe_extend_timeout(void); void driver_deferred_probe_trigger(void); const char *device_get_devnode(const struct device *dev, umode_t *mode, kuid_t *uid, kgid_t *gid, const char **tmp); /* /sys/devices directory */ extern struct kset *devices_kset; void devices_kset_move_last(struct device *dev); #if defined(CONFIG_MODULES) && defined(CONFIG_SYSFS) void module_add_driver(struct module *mod, struct device_driver *drv); void module_remove_driver(struct device_driver *drv); #else static inline void module_add_driver(struct module *mod, struct device_driver *drv) { } static inline void module_remove_driver(struct device_driver *drv) { } #endif #ifdef CONFIG_DEVTMPFS int devtmpfs_init(void); #else static inline int devtmpfs_init(void) { return 0; } #endif #ifdef CONFIG_BLOCK extern struct class block_class; static inline bool is_blockdev(struct device *dev) { return dev->class == &block_class; } #else static inline bool is_blockdev(struct device *dev) { return false; } #endif /* Device links support */ int device_links_read_lock(void); void device_links_read_unlock(int idx); int device_links_read_lock_held(void); int device_links_check_suppliers(struct device *dev); void device_links_force_bind(struct device *dev); void device_links_driver_bound(struct device *dev); void device_links_driver_cleanup(struct device *dev); void device_links_no_driver(struct device *dev); bool device_links_busy(struct device *dev); void device_links_unbind_consumers(struct device *dev); void fw_devlink_drivers_done(void); void fw_devlink_probing_done(void); /* device pm support */ void device_pm_move_to_tail(struct device *dev); #ifdef CONFIG_DEVTMPFS int devtmpfs_create_node(struct device *dev); int devtmpfs_delete_node(struct device *dev); #else static inline int devtmpfs_create_node(struct device *dev) { return 0; } static inline int devtmpfs_delete_node(struct device *dev) { return 0; } #endif void software_node_notify(struct device *dev); void software_node_notify_remove(struct device *dev); |
182 181 182 179 123 176 156 104 182 182 107 10 178 176 2 142 91 1 75 39 7 25 38 38 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hfs/bfind.c * * Copyright (C) 2001 * Brad Boyer (flar@allandria.com) * (C) 2003 Ardis Technologies <roman@ardistech.com> * * Search routines for btrees */ #include <linux/slab.h> #include "btree.h" int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) { void *ptr; fd->tree = tree; fd->bnode = NULL; ptr = kmalloc(tree->max_key_len * 2 + 4, GFP_KERNEL); if (!ptr) return -ENOMEM; fd->search_key = ptr; fd->key = ptr + tree->max_key_len + 2; hfs_dbg(BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); switch (tree->cnid) { case HFS_CAT_CNID: mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX); break; case HFS_EXT_CNID: mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX); break; case HFS_ATTR_CNID: mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX); break; default: return -EINVAL; } return 0; } void hfs_find_exit(struct hfs_find_data *fd) { hfs_bnode_put(fd->bnode); kfree(fd->search_key); hfs_dbg(BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); mutex_unlock(&fd->tree->tree_lock); fd->tree = NULL; } /* Find the record in bnode that best matches key (not greater than...)*/ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) { int cmpval; u16 off, len, keylen; int rec; int b, e; int res; b = 0; e = bnode->num_recs - 1; res = -ENOENT; do { rec = (e + b) / 2; len = hfs_brec_lenoff(bnode, rec, &off); keylen = hfs_brec_keylen(bnode, rec); if (keylen == 0) { res = -EINVAL; goto fail; } hfs_bnode_read(bnode, fd->key, off, keylen); cmpval = bnode->tree->keycmp(fd->key, fd->search_key); if (!cmpval) { e = rec; res = 0; goto done; } if (cmpval < 0) b = rec + 1; else e = rec - 1; } while (b <= e); if (rec != e && e >= 0) { len = hfs_brec_lenoff(bnode, e, &off); keylen = hfs_brec_keylen(bnode, e); if (keylen == 0) { res = -EINVAL; goto fail; } hfs_bnode_read(bnode, fd->key, off, keylen); } done: fd->record = e; fd->keyoffset = off; fd->keylength = keylen; fd->entryoffset = off + keylen; fd->entrylength = len - keylen; fail: return res; } /* Traverse a B*Tree from the root to a leaf finding best fit to key */ /* Return allocated copy of node found, set recnum to best record */ int hfs_brec_find(struct hfs_find_data *fd) { struct hfs_btree *tree; struct hfs_bnode *bnode; u32 nidx, parent; __be32 data; int height, res; tree = fd->tree; if (fd->bnode) hfs_bnode_put(fd->bnode); fd->bnode = NULL; nidx = tree->root; if (!nidx) return -ENOENT; height = tree->depth; res = 0; parent = 0; for (;;) { bnode = hfs_bnode_find(tree, nidx); if (IS_ERR(bnode)) { res = PTR_ERR(bnode); bnode = NULL; break; } if (bnode->height != height) goto invalid; if (bnode->type != (--height ? HFS_NODE_INDEX : HFS_NODE_LEAF)) goto invalid; bnode->parent = parent; res = __hfs_brec_find(bnode, fd); if (!height) break; if (fd->record < 0) goto release; parent = nidx; hfs_bnode_read(bnode, &data, fd->entryoffset, 4); nidx = be32_to_cpu(data); hfs_bnode_put(bnode); } fd->bnode = bnode; return res; invalid: pr_err("inconsistency in B*Tree (%d,%d,%d,%u,%u)\n", height, bnode->height, bnode->type, nidx, parent); res = -EIO; release: hfs_bnode_put(bnode); return res; } int hfs_brec_read(struct hfs_find_data *fd, void *rec, int rec_len) { int res; res = hfs_brec_find(fd); if (res) return res; if (fd->entrylength > rec_len) return -EINVAL; hfs_bnode_read(fd->bnode, rec, fd->entryoffset, fd->entrylength); return 0; } int hfs_brec_goto(struct hfs_find_data *fd, int cnt) { struct hfs_btree *tree; struct hfs_bnode *bnode; int idx, res = 0; u16 off, len, keylen; bnode = fd->bnode; tree = bnode->tree; if (cnt < 0) { cnt = -cnt; while (cnt > fd->record) { cnt -= fd->record + 1; fd->record = bnode->num_recs - 1; idx = bnode->prev; if (!idx) { res = -ENOENT; goto out; } hfs_bnode_put(bnode); bnode = hfs_bnode_find(tree, idx); if (IS_ERR(bnode)) { res = PTR_ERR(bnode); bnode = NULL; goto out; } } fd->record -= cnt; } else { while (cnt >= bnode->num_recs - fd->record) { cnt -= bnode->num_recs - fd->record; fd->record = 0; idx = bnode->next; if (!idx) { res = -ENOENT; goto out; } hfs_bnode_put(bnode); bnode = hfs_bnode_find(tree, idx); if (IS_ERR(bnode)) { res = PTR_ERR(bnode); bnode = NULL; goto out; } } fd->record += cnt; } len = hfs_brec_lenoff(bnode, fd->record, &off); keylen = hfs_brec_keylen(bnode, fd->record); if (keylen == 0) { res = -EINVAL; goto out; } fd->keyoffset = off; fd->keylength = keylen; fd->entryoffset = off + keylen; fd->entrylength = len - keylen; hfs_bnode_read(bnode, fd->key, off, keylen); out: fd->bnode = bnode; return res; } |
1 1 1 2 2 2 2 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 2 3 2 1 3 1 2 3 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 | // SPDX-License-Identifier: GPL-2.0-or-later /* * imon.c: input and display driver for SoundGraph iMON IR/VFD/LCD * * Copyright(C) 2010 Jarod Wilson <jarod@wilsonet.com> * Portions based on the original lirc_imon driver, * Copyright(C) 2004 Venky Raju(dev@venky.ws) * * Huge thanks to R. Geoff Newbury for invaluable debugging on the * 0xffdc iMON devices, and for sending me one to hack on, without * which the support for them wouldn't be nearly as good. Thanks * also to the numerous 0xffdc device owners that tested auto-config * support for me and provided debug dumps from their devices. */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ #include <linux/errno.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/ktime.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/ratelimit.h> #include <linux/input.h> #include <linux/usb.h> #include <linux/usb/input.h> #include <media/rc-core.h> #include <linux/timer.h> #define MOD_AUTHOR "Jarod Wilson <jarod@wilsonet.com>" #define MOD_DESC "Driver for SoundGraph iMON MultiMedia IR/Display" #define MOD_NAME "imon" #define MOD_VERSION "0.9.4" #define DISPLAY_MINOR_BASE 144 #define DEVICE_NAME "lcd%d" #define BUF_CHUNK_SIZE 8 #define BUF_SIZE 128 #define BIT_DURATION 250 /* each bit received is 250us */ #define IMON_CLOCK_ENABLE_PACKETS 2 /*** P R O T O T Y P E S ***/ /* USB Callback prototypes */ static int imon_probe(struct usb_interface *interface, const struct usb_device_id *id); static void imon_disconnect(struct usb_interface *interface); static void usb_rx_callback_intf0(struct urb *urb); static void usb_rx_callback_intf1(struct urb *urb); static void usb_tx_callback(struct urb *urb); /* suspend/resume support */ static int imon_resume(struct usb_interface *intf); static int imon_suspend(struct usb_interface *intf, pm_message_t message); /* Display file_operations function prototypes */ static int display_open(struct inode *inode, struct file *file); static int display_close(struct inode *inode, struct file *file); /* VFD write operation */ static ssize_t vfd_write(struct file *file, const char __user *buf, size_t n_bytes, loff_t *pos); /* LCD file_operations override function prototypes */ static ssize_t lcd_write(struct file *file, const char __user *buf, size_t n_bytes, loff_t *pos); /*** G L O B A L S ***/ struct imon_panel_key_table { u64 hw_code; u32 keycode; }; struct imon_usb_dev_descr { __u16 flags; #define IMON_NO_FLAGS 0 #define IMON_NEED_20MS_PKT_DELAY 1 #define IMON_SUPPRESS_REPEATED_KEYS 2 struct imon_panel_key_table key_table[]; }; struct imon_context { struct device *dev; /* Newer devices have two interfaces */ struct usb_device *usbdev_intf0; struct usb_device *usbdev_intf1; bool display_supported; /* not all controllers do */ bool display_isopen; /* display port has been opened */ bool rf_device; /* true if iMON 2.4G LT/DT RF device */ bool rf_isassociating; /* RF remote associating */ bool dev_present_intf0; /* USB device presence, interface 0 */ bool dev_present_intf1; /* USB device presence, interface 1 */ struct mutex lock; /* to lock this object */ wait_queue_head_t remove_ok; /* For unexpected USB disconnects */ struct usb_endpoint_descriptor *rx_endpoint_intf0; struct usb_endpoint_descriptor *rx_endpoint_intf1; struct usb_endpoint_descriptor *tx_endpoint; struct urb *rx_urb_intf0; struct urb *rx_urb_intf1; struct urb *tx_urb; bool tx_control; unsigned char usb_rx_buf[8]; unsigned char usb_tx_buf[8]; unsigned int send_packet_delay; struct tx_t { unsigned char data_buf[35]; /* user data buffer */ struct completion finished; /* wait for write to finish */ bool busy; /* write in progress */ int status; /* status of tx completion */ } tx; u16 vendor; /* usb vendor ID */ u16 product; /* usb product ID */ struct rc_dev *rdev; /* rc-core device for remote */ struct input_dev *idev; /* input device for panel & IR mouse */ struct input_dev *touch; /* input device for touchscreen */ spinlock_t kc_lock; /* make sure we get keycodes right */ u32 kc; /* current input keycode */ u32 last_keycode; /* last reported input keycode */ u32 rc_scancode; /* the computed remote scancode */ u8 rc_toggle; /* the computed remote toggle bit */ u64 rc_proto; /* iMON or MCE (RC6) IR protocol? */ bool release_code; /* some keys send a release code */ u8 display_type; /* store the display type */ bool pad_mouse; /* toggle kbd(0)/mouse(1) mode */ char name_rdev[128]; /* rc input device name */ char phys_rdev[64]; /* rc input device phys path */ char name_idev[128]; /* input device name */ char phys_idev[64]; /* input device phys path */ char name_touch[128]; /* touch screen name */ char phys_touch[64]; /* touch screen phys path */ struct timer_list ttimer; /* touch screen timer */ int touch_x; /* x coordinate on touchscreen */ int touch_y; /* y coordinate on touchscreen */ const struct imon_usb_dev_descr *dev_descr; /* device description with key */ /* table for front panels */ /* * Fields for deferring free_imon_context(). * * Since reference to "struct imon_context" is stored into * "struct file"->private_data, we need to remember * how many file descriptors might access this "struct imon_context". */ refcount_t users; /* * Use a flag for telling display_open()/vfd_write()/lcd_write() that * imon_disconnect() was already called. */ bool disconnected; /* * We need to wait for RCU grace period in order to allow * display_open() to safely check ->disconnected and increment ->users. */ struct rcu_head rcu; }; #define TOUCH_TIMEOUT (HZ/30) /* vfd character device file operations */ static const struct file_operations vfd_fops = { .owner = THIS_MODULE, .open = display_open, .write = vfd_write, .release = display_close, .llseek = noop_llseek, }; /* lcd character device file operations */ static const struct file_operations lcd_fops = { .owner = THIS_MODULE, .open = display_open, .write = lcd_write, .release = display_close, .llseek = noop_llseek, }; enum { IMON_DISPLAY_TYPE_AUTO = 0, IMON_DISPLAY_TYPE_VFD = 1, IMON_DISPLAY_TYPE_LCD = 2, IMON_DISPLAY_TYPE_VGA = 3, IMON_DISPLAY_TYPE_NONE = 4, }; enum { IMON_KEY_IMON = 0, IMON_KEY_MCE = 1, IMON_KEY_PANEL = 2, }; static struct usb_class_driver imon_vfd_class = { .name = DEVICE_NAME, .fops = &vfd_fops, .minor_base = DISPLAY_MINOR_BASE, }; static struct usb_class_driver imon_lcd_class = { .name = DEVICE_NAME, .fops = &lcd_fops, .minor_base = DISPLAY_MINOR_BASE, }; /* imon receiver front panel/knob key table */ static const struct imon_usb_dev_descr imon_default_table = { .flags = IMON_NO_FLAGS, .key_table = { { 0x000000000f00ffeell, KEY_MEDIA }, /* Go */ { 0x000000001200ffeell, KEY_UP }, { 0x000000001300ffeell, KEY_DOWN }, { 0x000000001400ffeell, KEY_LEFT }, { 0x000000001500ffeell, KEY_RIGHT }, { 0x000000001600ffeell, KEY_ENTER }, { 0x000000001700ffeell, KEY_ESC }, { 0x000000001f00ffeell, KEY_AUDIO }, { 0x000000002000ffeell, KEY_VIDEO }, { 0x000000002100ffeell, KEY_CAMERA }, { 0x000000002700ffeell, KEY_DVD }, { 0x000000002300ffeell, KEY_TV }, { 0x000000002b00ffeell, KEY_EXIT }, { 0x000000002c00ffeell, KEY_SELECT }, { 0x000000002d00ffeell, KEY_MENU }, { 0x000000000500ffeell, KEY_PREVIOUS }, { 0x000000000700ffeell, KEY_REWIND }, { 0x000000000400ffeell, KEY_STOP }, { 0x000000003c00ffeell, KEY_PLAYPAUSE }, { 0x000000000800ffeell, KEY_FASTFORWARD }, { 0x000000000600ffeell, KEY_NEXT }, { 0x000000010000ffeell, KEY_RIGHT }, { 0x000001000000ffeell, KEY_LEFT }, { 0x000000003d00ffeell, KEY_SELECT }, { 0x000100000000ffeell, KEY_VOLUMEUP }, { 0x010000000000ffeell, KEY_VOLUMEDOWN }, { 0x000000000100ffeell, KEY_MUTE }, /* 0xffdc iMON MCE VFD */ { 0x00010000ffffffeell, KEY_VOLUMEUP }, { 0x01000000ffffffeell, KEY_VOLUMEDOWN }, { 0x00000001ffffffeell, KEY_MUTE }, { 0x0000000fffffffeell, KEY_MEDIA }, { 0x00000012ffffffeell, KEY_UP }, { 0x00000013ffffffeell, KEY_DOWN }, { 0x00000014ffffffeell, KEY_LEFT }, { 0x00000015ffffffeell, KEY_RIGHT }, { 0x00000016ffffffeell, KEY_ENTER }, { 0x00000017ffffffeell, KEY_ESC }, /* iMON Knob values */ { 0x000100ffffffffeell, KEY_VOLUMEUP }, { 0x010000ffffffffeell, KEY_VOLUMEDOWN }, { 0x000008ffffffffeell, KEY_MUTE }, { 0, KEY_RESERVED }, } }; static const struct imon_usb_dev_descr imon_OEM_VFD = { .flags = IMON_NEED_20MS_PKT_DELAY, .key_table = { { 0x000000000f00ffeell, KEY_MEDIA }, /* Go */ { 0x000000001200ffeell, KEY_UP }, { 0x000000001300ffeell, KEY_DOWN }, { 0x000000001400ffeell, KEY_LEFT }, { 0x000000001500ffeell, KEY_RIGHT }, { 0x000000001600ffeell, KEY_ENTER }, { 0x000000001700ffeell, KEY_ESC }, { 0x000000001f00ffeell, KEY_AUDIO }, { 0x000000002b00ffeell, KEY_EXIT }, { 0x000000002c00ffeell, KEY_SELECT }, { 0x000000002d00ffeell, KEY_MENU }, { 0x000000000500ffeell, KEY_PREVIOUS }, { 0x000000000700ffeell, KEY_REWIND }, { 0x000000000400ffeell, KEY_STOP }, { 0x000000003c00ffeell, KEY_PLAYPAUSE }, { 0x000000000800ffeell, KEY_FASTFORWARD }, { 0x000000000600ffeell, KEY_NEXT }, { 0x000000010000ffeell, KEY_RIGHT }, { 0x000001000000ffeell, KEY_LEFT }, { 0x000000003d00ffeell, KEY_SELECT }, { 0x000100000000ffeell, KEY_VOLUMEUP }, { 0x010000000000ffeell, KEY_VOLUMEDOWN }, { 0x000000000100ffeell, KEY_MUTE }, /* 0xffdc iMON MCE VFD */ { 0x00010000ffffffeell, KEY_VOLUMEUP }, { 0x01000000ffffffeell, KEY_VOLUMEDOWN }, { 0x00000001ffffffeell, KEY_MUTE }, { 0x0000000fffffffeell, KEY_MEDIA }, { 0x00000012ffffffeell, KEY_UP }, { 0x00000013ffffffeell, KEY_DOWN }, { 0x00000014ffffffeell, KEY_LEFT }, { 0x00000015ffffffeell, KEY_RIGHT }, { 0x00000016ffffffeell, KEY_ENTER }, { 0x00000017ffffffeell, KEY_ESC }, /* iMON Knob values */ { 0x000100ffffffffeell, KEY_VOLUMEUP }, { 0x010000ffffffffeell, KEY_VOLUMEDOWN }, { 0x000008ffffffffeell, KEY_MUTE }, { 0, KEY_RESERVED }, } }; /* imon receiver front panel/knob key table for DH102*/ static const struct imon_usb_dev_descr imon_DH102 = { .flags = IMON_NO_FLAGS, .key_table = { { 0x000100000000ffeell, KEY_VOLUMEUP }, { 0x010000000000ffeell, KEY_VOLUMEDOWN }, { 0x000000010000ffeell, KEY_MUTE }, { 0x0000000f0000ffeell, KEY_MEDIA }, { 0x000000120000ffeell, KEY_UP }, { 0x000000130000ffeell, KEY_DOWN }, { 0x000000140000ffeell, KEY_LEFT }, { 0x000000150000ffeell, KEY_RIGHT }, { 0x000000160000ffeell, KEY_ENTER }, { 0x000000170000ffeell, KEY_ESC }, { 0x0000002b0000ffeell, KEY_EXIT }, { 0x0000002c0000ffeell, KEY_SELECT }, { 0x0000002d0000ffeell, KEY_MENU }, { 0, KEY_RESERVED } } }; /* imon ultrabay front panel key table */ static const struct imon_usb_dev_descr ultrabay_table = { .flags = IMON_SUPPRESS_REPEATED_KEYS, .key_table = { { 0x0000000f0000ffeell, KEY_MEDIA }, /* Go */ { 0x000000000100ffeell, KEY_UP }, { 0x000000000001ffeell, KEY_DOWN }, { 0x000000160000ffeell, KEY_ENTER }, { 0x0000001f0000ffeell, KEY_AUDIO }, /* Music */ { 0x000000200000ffeell, KEY_VIDEO }, /* Movie */ { 0x000000210000ffeell, KEY_CAMERA }, /* Photo */ { 0x000000270000ffeell, KEY_DVD }, /* DVD */ { 0x000000230000ffeell, KEY_TV }, /* TV */ { 0x000000050000ffeell, KEY_PREVIOUS }, /* Previous */ { 0x000000070000ffeell, KEY_REWIND }, { 0x000000040000ffeell, KEY_STOP }, { 0x000000020000ffeell, KEY_PLAYPAUSE }, { 0x000000080000ffeell, KEY_FASTFORWARD }, { 0x000000060000ffeell, KEY_NEXT }, /* Next */ { 0x000100000000ffeell, KEY_VOLUMEUP }, { 0x010000000000ffeell, KEY_VOLUMEDOWN }, { 0x000000010000ffeell, KEY_MUTE }, { 0, KEY_RESERVED }, } }; /* * USB Device ID for iMON USB Control Boards * * The Windows drivers contain 6 different inf files, more or less one for * each new device until the 0x0034-0x0046 devices, which all use the same * driver. Some of the devices in the 34-46 range haven't been definitively * identified yet. Early devices have either a TriGem Computer, Inc. or a * Samsung vendor ID (0x0aa8 and 0x04e8 respectively), while all later * devices use the SoundGraph vendor ID (0x15c2). This driver only supports * the ffdc and later devices, which do onboard decoding. */ static const struct usb_device_id imon_usb_id_table[] = { /* * Several devices with this same device ID, all use iMON_PAD.inf * SoundGraph iMON PAD (IR & VFD) * SoundGraph iMON PAD (IR & LCD) * SoundGraph iMON Knob (IR only) */ { USB_DEVICE(0x15c2, 0xffdc), .driver_info = (unsigned long)&imon_default_table }, /* * Newer devices, all driven by the latest iMON Windows driver, full * list of device IDs extracted via 'strings Setup/data1.hdr |grep 15c2' * Need user input to fill in details on unknown devices. */ /* SoundGraph iMON OEM Touch LCD (IR & 7" VGA LCD) */ { USB_DEVICE(0x15c2, 0x0034), .driver_info = (unsigned long)&imon_DH102 }, /* SoundGraph iMON OEM Touch LCD (IR & 4.3" VGA LCD) */ { USB_DEVICE(0x15c2, 0x0035), .driver_info = (unsigned long)&imon_default_table}, /* SoundGraph iMON OEM VFD (IR & VFD) */ { USB_DEVICE(0x15c2, 0x0036), .driver_info = (unsigned long)&imon_OEM_VFD }, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x0037), .driver_info = (unsigned long)&imon_default_table}, /* SoundGraph iMON OEM LCD (IR & LCD) */ { USB_DEVICE(0x15c2, 0x0038), .driver_info = (unsigned long)&imon_default_table}, /* SoundGraph iMON UltraBay (IR & LCD) */ { USB_DEVICE(0x15c2, 0x0039), .driver_info = (unsigned long)&imon_default_table}, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x003a), .driver_info = (unsigned long)&imon_default_table}, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x003b), .driver_info = (unsigned long)&imon_default_table}, /* SoundGraph iMON OEM Inside (IR only) */ { USB_DEVICE(0x15c2, 0x003c), .driver_info = (unsigned long)&imon_default_table}, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x003d), .driver_info = (unsigned long)&imon_default_table}, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x003e), .driver_info = (unsigned long)&imon_default_table}, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x003f), .driver_info = (unsigned long)&imon_default_table}, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x0040), .driver_info = (unsigned long)&imon_default_table}, /* SoundGraph iMON MINI (IR only) */ { USB_DEVICE(0x15c2, 0x0041), .driver_info = (unsigned long)&imon_default_table}, /* Antec Veris Multimedia Station EZ External (IR only) */ { USB_DEVICE(0x15c2, 0x0042), .driver_info = (unsigned long)&imon_default_table}, /* Antec Veris Multimedia Station Basic Internal (IR only) */ { USB_DEVICE(0x15c2, 0x0043), .driver_info = (unsigned long)&imon_default_table}, /* Antec Veris Multimedia Station Elite (IR & VFD) */ { USB_DEVICE(0x15c2, 0x0044), .driver_info = (unsigned long)&imon_default_table}, /* Antec Veris Multimedia Station Premiere (IR & LCD) */ { USB_DEVICE(0x15c2, 0x0045), .driver_info = (unsigned long)&imon_default_table}, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x0046), .driver_info = (unsigned long)&imon_default_table}, {} }; /* USB Device data */ static struct usb_driver imon_driver = { .name = MOD_NAME, .probe = imon_probe, .disconnect = imon_disconnect, .suspend = imon_suspend, .resume = imon_resume, .id_table = imon_usb_id_table, }; /* Module bookkeeping bits */ MODULE_AUTHOR(MOD_AUTHOR); MODULE_DESCRIPTION(MOD_DESC); MODULE_VERSION(MOD_VERSION); MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(usb, imon_usb_id_table); static bool debug; module_param(debug, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(debug, "Debug messages: 0=no, 1=yes (default: no)"); /* lcd, vfd, vga or none? should be auto-detected, but can be overridden... */ static int display_type; module_param(display_type, int, S_IRUGO); MODULE_PARM_DESC(display_type, "Type of attached display. 0=autodetect, 1=vfd, 2=lcd, 3=vga, 4=none (default: autodetect)"); static int pad_stabilize = 1; module_param(pad_stabilize, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(pad_stabilize, "Apply stabilization algorithm to iMON PAD presses in arrow key mode. 0=disable, 1=enable (default)."); /* * In certain use cases, mouse mode isn't really helpful, and could actually * cause confusion, so allow disabling it when the IR device is open. */ static bool nomouse; module_param(nomouse, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(nomouse, "Disable mouse input device mode when IR device is open. 0=don't disable, 1=disable. (default: don't disable)"); /* threshold at which a pad push registers as an arrow key in kbd mode */ static int pad_thresh; module_param(pad_thresh, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(pad_thresh, "Threshold at which a pad push registers as an arrow key in kbd mode (default: 28)"); static void free_imon_context(struct imon_context *ictx) { struct device *dev = ictx->dev; usb_free_urb(ictx->tx_urb); WARN_ON(ictx->dev_present_intf0); usb_free_urb(ictx->rx_urb_intf0); WARN_ON(ictx->dev_present_intf1); usb_free_urb(ictx->rx_urb_intf1); kfree_rcu(ictx, rcu); dev_dbg(dev, "%s: iMON context freed\n", __func__); } /* * Called when the Display device (e.g. /dev/lcd0) * is opened by the application. */ static int display_open(struct inode *inode, struct file *file) { struct usb_interface *interface; struct imon_context *ictx = NULL; int subminor; int retval = 0; subminor = iminor(inode); interface = usb_find_interface(&imon_driver, subminor); if (!interface) { pr_err("could not find interface for minor %d\n", subminor); retval = -ENODEV; goto exit; } rcu_read_lock(); ictx = usb_get_intfdata(interface); if (!ictx || ictx->disconnected || !refcount_inc_not_zero(&ictx->users)) { rcu_read_unlock(); pr_err("no context found for minor %d\n", subminor); retval = -ENODEV; goto exit; } rcu_read_unlock(); mutex_lock(&ictx->lock); if (!ictx->display_supported) { pr_err("display not supported by device\n"); retval = -ENODEV; } else if (ictx->display_isopen) { pr_err("display port is already open\n"); retval = -EBUSY; } else { ictx->display_isopen = true; file->private_data = ictx; dev_dbg(ictx->dev, "display port opened\n"); } mutex_unlock(&ictx->lock); if (retval && refcount_dec_and_test(&ictx->users)) free_imon_context(ictx); exit: return retval; } /* * Called when the display device (e.g. /dev/lcd0) * is closed by the application. */ static int display_close(struct inode *inode, struct file *file) { struct imon_context *ictx = file->private_data; int retval = 0; mutex_lock(&ictx->lock); if (!ictx->display_supported) { pr_err("display not supported by device\n"); retval = -ENODEV; } else if (!ictx->display_isopen) { pr_err("display is not open\n"); retval = -EIO; } else { ictx->display_isopen = false; dev_dbg(ictx->dev, "display port closed\n"); } mutex_unlock(&ictx->lock); if (refcount_dec_and_test(&ictx->users)) free_imon_context(ictx); return retval; } /* * Sends a packet to the device -- this function must be called with * ictx->lock held, or its unlock/lock sequence while waiting for tx * to complete can/will lead to a deadlock. */ static int send_packet(struct imon_context *ictx) { unsigned int pipe; unsigned long timeout; int interval = 0; int retval = 0; struct usb_ctrlrequest *control_req = NULL; /* Check if we need to use control or interrupt urb */ if (!ictx->tx_control) { pipe = usb_sndintpipe(ictx->usbdev_intf0, ictx->tx_endpoint->bEndpointAddress); interval = ictx->tx_endpoint->bInterval; usb_fill_int_urb(ictx->tx_urb, ictx->usbdev_intf0, pipe, ictx->usb_tx_buf, sizeof(ictx->usb_tx_buf), usb_tx_callback, ictx, interval); ictx->tx_urb->actual_length = 0; } else { /* fill request into kmalloc'ed space: */ control_req = kmalloc(sizeof(*control_req), GFP_KERNEL); if (control_req == NULL) return -ENOMEM; /* setup packet is '21 09 0200 0001 0008' */ control_req->bRequestType = 0x21; control_req->bRequest = 0x09; control_req->wValue = cpu_to_le16(0x0200); control_req->wIndex = cpu_to_le16(0x0001); control_req->wLength = cpu_to_le16(0x0008); /* control pipe is endpoint 0x00 */ pipe = usb_sndctrlpipe(ictx->usbdev_intf0, 0); /* build the control urb */ usb_fill_control_urb(ictx->tx_urb, ictx->usbdev_intf0, pipe, (unsigned char *)control_req, ictx->usb_tx_buf, sizeof(ictx->usb_tx_buf), usb_tx_callback, ictx); ictx->tx_urb->actual_length = 0; } reinit_completion(&ictx->tx.finished); ictx->tx.busy = true; smp_rmb(); /* ensure later readers know we're busy */ retval = usb_submit_urb(ictx->tx_urb, GFP_KERNEL); if (retval) { ictx->tx.busy = false; smp_rmb(); /* ensure later readers know we're not busy */ pr_err_ratelimited("error submitting urb(%d)\n", retval); } else { /* Wait for transmission to complete (or abort) */ retval = wait_for_completion_interruptible( &ictx->tx.finished); if (retval) { usb_kill_urb(ictx->tx_urb); pr_err_ratelimited("task interrupted\n"); } ictx->tx.busy = false; retval = ictx->tx.status; if (retval) pr_err_ratelimited("packet tx failed (%d)\n", retval); } kfree(control_req); /* * Induce a mandatory delay before returning, as otherwise, * send_packet can get called so rapidly as to overwhelm the device, * particularly on faster systems and/or those with quirky usb. */ timeout = msecs_to_jiffies(ictx->send_packet_delay); set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(timeout); return retval; } /* * Sends an associate packet to the iMON 2.4G. * * This might not be such a good idea, since it has an id collision with * some versions of the "IR & VFD" combo. The only way to determine if it * is an RF version is to look at the product description string. (Which * we currently do not fetch). */ static int send_associate_24g(struct imon_context *ictx) { const unsigned char packet[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20 }; if (!ictx) { pr_err("no context for device\n"); return -ENODEV; } if (!ictx->dev_present_intf0) { pr_err("no iMON device present\n"); return -ENODEV; } memcpy(ictx->usb_tx_buf, packet, sizeof(packet)); return send_packet(ictx); } /* * Sends packets to setup and show clock on iMON display * * Arguments: year - last 2 digits of year, month - 1..12, * day - 1..31, dow - day of the week (0-Sun...6-Sat), * hour - 0..23, minute - 0..59, second - 0..59 */ static int send_set_imon_clock(struct imon_context *ictx, unsigned int year, unsigned int month, unsigned int day, unsigned int dow, unsigned int hour, unsigned int minute, unsigned int second) { unsigned char clock_enable_pkt[IMON_CLOCK_ENABLE_PACKETS][8]; int retval = 0; int i; if (!ictx) { pr_err("no context for device\n"); return -ENODEV; } switch (ictx->display_type) { case IMON_DISPLAY_TYPE_LCD: clock_enable_pkt[0][0] = 0x80; clock_enable_pkt[0][1] = year; clock_enable_pkt[0][2] = month-1; clock_enable_pkt[0][3] = day; clock_enable_pkt[0][4] = hour; clock_enable_pkt[0][5] = minute; clock_enable_pkt[0][6] = second; clock_enable_pkt[1][0] = 0x80; clock_enable_pkt[1][1] = 0; clock_enable_pkt[1][2] = 0; clock_enable_pkt[1][3] = 0; clock_enable_pkt[1][4] = 0; clock_enable_pkt[1][5] = 0; clock_enable_pkt[1][6] = 0; if (ictx->product == 0xffdc) { clock_enable_pkt[0][7] = 0x50; clock_enable_pkt[1][7] = 0x51; } else { clock_enable_pkt[0][7] = 0x88; clock_enable_pkt[1][7] = 0x8a; } break; case IMON_DISPLAY_TYPE_VFD: clock_enable_pkt[0][0] = year; clock_enable_pkt[0][1] = month-1; clock_enable_pkt[0][2] = day; clock_enable_pkt[0][3] = dow; clock_enable_pkt[0][4] = hour; clock_enable_pkt[0][5] = minute; clock_enable_pkt[0][6] = second; clock_enable_pkt[0][7] = 0x40; clock_enable_pkt[1][0] = 0; clock_enable_pkt[1][1] = 0; clock_enable_pkt[1][2] = 1; clock_enable_pkt[1][3] = 0; clock_enable_pkt[1][4] = 0; clock_enable_pkt[1][5] = 0; clock_enable_pkt[1][6] = 0; clock_enable_pkt[1][7] = 0x42; break; default: return -ENODEV; } for (i = 0; i < IMON_CLOCK_ENABLE_PACKETS; i++) { memcpy(ictx->usb_tx_buf, clock_enable_pkt[i], 8); retval = send_packet(ictx); if (retval) { pr_err("send_packet failed for packet %d\n", i); break; } } return retval; } /* * These are the sysfs functions to handle the association on the iMON 2.4G LT. */ static ssize_t associate_remote_show(struct device *d, struct device_attribute *attr, char *buf) { struct imon_context *ictx = dev_get_drvdata(d); if (!ictx) return -ENODEV; mutex_lock(&ictx->lock); if (ictx->rf_isassociating) strscpy(buf, "associating\n", PAGE_SIZE); else strscpy(buf, "closed\n", PAGE_SIZE); dev_info(d, "Visit https://www.lirc.org/html/imon-24g.html for instructions on how to associate your iMON 2.4G DT/LT remote\n"); mutex_unlock(&ictx->lock); return strlen(buf); } static ssize_t associate_remote_store(struct device *d, struct device_attribute *attr, const char *buf, size_t count) { struct imon_context *ictx; ictx = dev_get_drvdata(d); if (!ictx) return -ENODEV; mutex_lock(&ictx->lock); ictx->rf_isassociating = true; send_associate_24g(ictx); mutex_unlock(&ictx->lock); return count; } /* * sysfs functions to control internal imon clock */ static ssize_t imon_clock_show(struct device *d, struct device_attribute *attr, char *buf) { struct imon_context *ictx = dev_get_drvdata(d); size_t len; if (!ictx) return -ENODEV; mutex_lock(&ictx->lock); if (!ictx->display_supported) { len = snprintf(buf, PAGE_SIZE, "Not supported."); } else { len = snprintf(buf, PAGE_SIZE, "To set the clock on your iMON display:\n" "# date \"+%%y %%m %%d %%w %%H %%M %%S\" > imon_clock\n" "%s", ictx->display_isopen ? "\nNOTE: imon device must be closed\n" : ""); } mutex_unlock(&ictx->lock); return len; } static ssize_t imon_clock_store(struct device *d, struct device_attribute *attr, const char *buf, size_t count) { struct imon_context *ictx = dev_get_drvdata(d); ssize_t retval; unsigned int year, month, day, dow, hour, minute, second; if (!ictx) return -ENODEV; mutex_lock(&ictx->lock); if (!ictx->display_supported) { retval = -ENODEV; goto exit; } else if (ictx->display_isopen) { retval = -EBUSY; goto exit; } if (sscanf(buf, "%u %u %u %u %u %u %u", &year, &month, &day, &dow, &hour, &minute, &second) != 7) { retval = -EINVAL; goto exit; } if ((month < 1 || month > 12) || (day < 1 || day > 31) || (dow > 6) || (hour > 23) || (minute > 59) || (second > 59)) { retval = -EINVAL; goto exit; } retval = send_set_imon_clock(ictx, year, month, day, dow, hour, minute, second); if (retval) goto exit; retval = count; exit: mutex_unlock(&ictx->lock); return retval; } static DEVICE_ATTR_RW(imon_clock); static DEVICE_ATTR_RW(associate_remote); static struct attribute *imon_display_sysfs_entries[] = { &dev_attr_imon_clock.attr, NULL }; static const struct attribute_group imon_display_attr_group = { .attrs = imon_display_sysfs_entries }; static struct attribute *imon_rf_sysfs_entries[] = { &dev_attr_associate_remote.attr, NULL }; static const struct attribute_group imon_rf_attr_group = { .attrs = imon_rf_sysfs_entries }; /* * Writes data to the VFD. The iMON VFD is 2x16 characters * and requires data in 5 consecutive USB interrupt packets, * each packet but the last carrying 7 bytes. * * I don't know if the VFD board supports features such as * scrolling, clearing rows, blanking, etc. so at * the caller must provide a full screen of data. If fewer * than 32 bytes are provided spaces will be appended to * generate a full screen. */ static ssize_t vfd_write(struct file *file, const char __user *buf, size_t n_bytes, loff_t *pos) { int i; int offset; int seq; int retval = 0; struct imon_context *ictx = file->private_data; static const unsigned char vfd_packet6[] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF }; if (ictx->disconnected) return -ENODEV; if (mutex_lock_interruptible(&ictx->lock)) return -ERESTARTSYS; if (!ictx->dev_present_intf0) { pr_err_ratelimited("no iMON device present\n"); retval = -ENODEV; goto exit; } if (n_bytes <= 0 || n_bytes > 32) { pr_err_ratelimited("invalid payload size\n"); retval = -EINVAL; goto exit; } if (copy_from_user(ictx->tx.data_buf, buf, n_bytes)) { retval = -EFAULT; goto exit; } /* Pad with spaces */ for (i = n_bytes; i < 32; ++i) ictx->tx.data_buf[i] = ' '; for (i = 32; i < 35; ++i) ictx->tx.data_buf[i] = 0xFF; offset = 0; seq = 0; do { memcpy(ictx->usb_tx_buf, ictx->tx.data_buf + offset, 7); ictx->usb_tx_buf[7] = (unsigned char) seq; retval = send_packet(ictx); if (retval) { pr_err_ratelimited("send packet #%d failed\n", seq / 2); goto exit; } else { seq += 2; offset += 7; } } while (offset < 35); /* Send packet #6 */ memcpy(ictx->usb_tx_buf, &vfd_packet6, sizeof(vfd_packet6)); ictx->usb_tx_buf[7] = (unsigned char) seq; retval = send_packet(ictx); if (retval) pr_err_ratelimited("send packet #%d failed\n", seq / 2); exit: mutex_unlock(&ictx->lock); return (!retval) ? n_bytes : retval; } /* * Writes data to the LCD. The iMON OEM LCD screen expects 8-byte * packets. We accept data as 16 hexadecimal digits, followed by a * newline (to make it easy to drive the device from a command-line * -- even though the actual binary data is a bit complicated). * * The device itself is not a "traditional" text-mode display. It's * actually a 16x96 pixel bitmap display. That means if you want to * display text, you've got to have your own "font" and translate the * text into bitmaps for display. This is really flexible (you can * display whatever diacritics you need, and so on), but it's also * a lot more complicated than most LCDs... */ static ssize_t lcd_write(struct file *file, const char __user *buf, size_t n_bytes, loff_t *pos) { int retval = 0; struct imon_context *ictx = file->private_data; if (ictx->disconnected) return -ENODEV; mutex_lock(&ictx->lock); if (!ictx->display_supported) { pr_err_ratelimited("no iMON display present\n"); retval = -ENODEV; goto exit; } if (n_bytes != 8) { pr_err_ratelimited("invalid payload size: %d (expected 8)\n", (int)n_bytes); retval = -EINVAL; goto exit; } if (copy_from_user(ictx->usb_tx_buf, buf, 8)) { retval = -EFAULT; goto exit; } retval = send_packet(ictx); if (retval) { pr_err_ratelimited("send packet failed!\n"); goto exit; } else { dev_dbg(ictx->dev, "%s: write %d bytes to LCD\n", __func__, (int) n_bytes); } exit: mutex_unlock(&ictx->lock); return (!retval) ? n_bytes : retval; } /* * Callback function for USB core API: transmit data */ static void usb_tx_callback(struct urb *urb) { struct imon_context *ictx; if (!urb) return; ictx = (struct imon_context *)urb->context; if (!ictx) return; ictx->tx.status = urb->status; /* notify waiters that write has finished */ ictx->tx.busy = false; smp_rmb(); /* ensure later readers know we're not busy */ complete(&ictx->tx.finished); } /* * report touchscreen input */ static void imon_touch_display_timeout(struct timer_list *t) { struct imon_context *ictx = from_timer(ictx, t, ttimer); if (ictx->display_type != IMON_DISPLAY_TYPE_VGA) return; input_report_abs(ictx->touch, ABS_X, ictx->touch_x); input_report_abs(ictx->touch, ABS_Y, ictx->touch_y); input_report_key(ictx->touch, BTN_TOUCH, 0x00); input_sync(ictx->touch); } /* * iMON IR receivers support two different signal sets -- those used by * the iMON remotes, and those used by the Windows MCE remotes (which is * really just RC-6), but only one or the other at a time, as the signals * are decoded onboard the receiver. * * This function gets called two different ways, one way is from * rc_register_device, for initial protocol selection/setup, and the other is * via a userspace-initiated protocol change request, either by direct sysfs * prodding or by something like ir-keytable. In the rc_register_device case, * the imon context lock is already held, but when initiated from userspace, * it is not, so we must acquire it prior to calling send_packet, which * requires that the lock is held. */ static int imon_ir_change_protocol(struct rc_dev *rc, u64 *rc_proto) { int retval; struct imon_context *ictx = rc->priv; struct device *dev = ictx->dev; bool unlock = false; unsigned char ir_proto_packet[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86 }; if (*rc_proto && !(*rc_proto & rc->allowed_protocols)) dev_warn(dev, "Looks like you're trying to use an IR protocol this device does not support\n"); if (*rc_proto & RC_PROTO_BIT_RC6_MCE) { dev_dbg(dev, "Configuring IR receiver for MCE protocol\n"); ir_proto_packet[0] = 0x01; *rc_proto = RC_PROTO_BIT_RC6_MCE; } else if (*rc_proto & RC_PROTO_BIT_IMON) { dev_dbg(dev, "Configuring IR receiver for iMON protocol\n"); if (!pad_stabilize) dev_dbg(dev, "PAD stabilize functionality disabled\n"); /* ir_proto_packet[0] = 0x00; // already the default */ *rc_proto = RC_PROTO_BIT_IMON; } else { dev_warn(dev, "Unsupported IR protocol specified, overriding to iMON IR protocol\n"); if (!pad_stabilize) dev_dbg(dev, "PAD stabilize functionality disabled\n"); /* ir_proto_packet[0] = 0x00; // already the default */ *rc_proto = RC_PROTO_BIT_IMON; } memcpy(ictx->usb_tx_buf, &ir_proto_packet, sizeof(ir_proto_packet)); if (!mutex_is_locked(&ictx->lock)) { unlock = true; mutex_lock(&ictx->lock); } retval = send_packet(ictx); if (retval) goto out; ictx->rc_proto = *rc_proto; ictx->pad_mouse = false; out: if (unlock) mutex_unlock(&ictx->lock); return retval; } /* * The directional pad behaves a bit differently, depending on whether this is * one of the older ffdc devices or a newer device. Newer devices appear to * have a higher resolution matrix for more precise mouse movement, but it * makes things overly sensitive in keyboard mode, so we do some interesting * contortions to make it less touchy. Older devices run through the same * routine with shorter timeout and a smaller threshold. */ static int stabilize(int a, int b, u16 timeout, u16 threshold) { ktime_t ct; static ktime_t prev_time; static ktime_t hit_time; static int x, y, prev_result, hits; int result = 0; long msec, msec_hit; ct = ktime_get(); msec = ktime_ms_delta(ct, prev_time); msec_hit = ktime_ms_delta(ct, hit_time); if (msec > 100) { x = 0; y = 0; hits = 0; } x += a; y += b; prev_time = ct; if (abs(x) > threshold || abs(y) > threshold) { if (abs(y) > abs(x)) result = (y > 0) ? 0x7F : 0x80; else result = (x > 0) ? 0x7F00 : 0x8000; x = 0; y = 0; if (result == prev_result) { hits++; if (hits > 3) { switch (result) { case 0x7F: y = 17 * threshold / 30; break; case 0x80: y -= 17 * threshold / 30; break; case 0x7F00: x = 17 * threshold / 30; break; case 0x8000: x -= 17 * threshold / 30; break; } } if (hits == 2 && msec_hit < timeout) { result = 0; hits = 1; } } else { prev_result = result; hits = 1; hit_time = ct; } } return result; } static u32 imon_remote_key_lookup(struct imon_context *ictx, u32 scancode) { u32 keycode; u32 release; bool is_release_code = false; /* Look for the initial press of a button */ keycode = rc_g_keycode_from_table(ictx->rdev, scancode); ictx->rc_toggle = 0x0; ictx->rc_scancode = scancode; /* Look for the release of a button */ if (keycode == KEY_RESERVED) { release = scancode & ~0x4000; keycode = rc_g_keycode_from_table(ictx->rdev, release); if (keycode != KEY_RESERVED) is_release_code = true; } ictx->release_code = is_release_code; return keycode; } static u32 imon_mce_key_lookup(struct imon_context *ictx, u32 scancode) { u32 keycode; #define MCE_KEY_MASK 0x7000 #define MCE_TOGGLE_BIT 0x8000 /* * On some receivers, mce keys decode to 0x8000f04xx and 0x8000f84xx * (the toggle bit flipping between alternating key presses), while * on other receivers, we see 0x8000f74xx and 0x8000ff4xx. To keep * the table trim, we always or in the bits to look up 0x8000ff4xx, * but we can't or them into all codes, as some keys are decoded in * a different way w/o the same use of the toggle bit... */ if (scancode & 0x80000000) scancode = scancode | MCE_KEY_MASK | MCE_TOGGLE_BIT; ictx->rc_scancode = scancode; keycode = rc_g_keycode_from_table(ictx->rdev, scancode); /* not used in mce mode, but make sure we know its false */ ictx->release_code = false; return keycode; } static u32 imon_panel_key_lookup(struct imon_context *ictx, u64 code) { const struct imon_panel_key_table *key_table; u32 keycode = KEY_RESERVED; int i; key_table = ictx->dev_descr->key_table; for (i = 0; key_table[i].hw_code != 0; i++) { if (key_table[i].hw_code == (code | 0xffee)) { keycode = key_table[i].keycode; break; } } ictx->release_code = false; return keycode; } static bool imon_mouse_event(struct imon_context *ictx, unsigned char *buf, int len) { signed char rel_x = 0x00, rel_y = 0x00; u8 right_shift = 1; bool mouse_input = true; int dir = 0; unsigned long flags; spin_lock_irqsave(&ictx->kc_lock, flags); /* newer iMON device PAD or mouse button */ if (ictx->product != 0xffdc && (buf[0] & 0x01) && len == 5) { rel_x = buf[2]; rel_y = buf[3]; right_shift = 1; /* 0xffdc iMON PAD or mouse button input */ } else if (ictx->product == 0xffdc && (buf[0] & 0x40) && !((buf[1] & 0x01) || ((buf[1] >> 2) & 0x01))) { rel_x = (buf[1] & 0x08) | (buf[1] & 0x10) >> 2 | (buf[1] & 0x20) >> 4 | (buf[1] & 0x40) >> 6; if (buf[0] & 0x02) rel_x |= ~0x0f; rel_x = rel_x + rel_x / 2; rel_y = (buf[2] & 0x08) | (buf[2] & 0x10) >> 2 | (buf[2] & 0x20) >> 4 | (buf[2] & 0x40) >> 6; if (buf[0] & 0x01) rel_y |= ~0x0f; rel_y = rel_y + rel_y / 2; right_shift = 2; /* some ffdc devices decode mouse buttons differently... */ } else if (ictx->product == 0xffdc && (buf[0] == 0x68)) { right_shift = 2; /* ch+/- buttons, which we use for an emulated scroll wheel */ } else if (ictx->kc == KEY_CHANNELUP && (buf[2] & 0x40) != 0x40) { dir = 1; } else if (ictx->kc == KEY_CHANNELDOWN && (buf[2] & 0x40) != 0x40) { dir = -1; } else mouse_input = false; spin_unlock_irqrestore(&ictx->kc_lock, flags); if (mouse_input) { dev_dbg(ictx->dev, "sending mouse data via input subsystem\n"); if (dir) { input_report_rel(ictx->idev, REL_WHEEL, dir); } else if (rel_x || rel_y) { input_report_rel(ictx->idev, REL_X, rel_x); input_report_rel(ictx->idev, REL_Y, rel_y); } else { input_report_key(ictx->idev, BTN_LEFT, buf[1] & 0x1); input_report_key(ictx->idev, BTN_RIGHT, buf[1] >> right_shift & 0x1); } input_sync(ictx->idev); spin_lock_irqsave(&ictx->kc_lock, flags); ictx->last_keycode = ictx->kc; spin_unlock_irqrestore(&ictx->kc_lock, flags); } return mouse_input; } static void imon_touch_event(struct imon_context *ictx, unsigned char *buf) { mod_timer(&ictx->ttimer, jiffies + TOUCH_TIMEOUT); ictx->touch_x = (buf[0] << 4) | (buf[1] >> 4); ictx->touch_y = 0xfff - ((buf[2] << 4) | (buf[1] & 0xf)); input_report_abs(ictx->touch, ABS_X, ictx->touch_x); input_report_abs(ictx->touch, ABS_Y, ictx->touch_y); input_report_key(ictx->touch, BTN_TOUCH, 0x01); input_sync(ictx->touch); } static void imon_pad_to_keys(struct imon_context *ictx, unsigned char *buf) { int dir = 0; signed char rel_x = 0x00, rel_y = 0x00; u16 timeout, threshold; u32 scancode = KEY_RESERVED; unsigned long flags; /* * The imon directional pad functions more like a touchpad. Bytes 3 & 4 * contain a position coordinate (x,y), with each component ranging * from -14 to 14. We want to down-sample this to only 4 discrete values * for up/down/left/right arrow keys. Also, when you get too close to * diagonals, it has a tendency to jump back and forth, so lets try to * ignore when they get too close. */ if (ictx->product != 0xffdc) { /* first, pad to 8 bytes so it conforms with everything else */ buf[5] = buf[6] = buf[7] = 0; timeout = 500; /* in msecs */ /* (2*threshold) x (2*threshold) square */ threshold = pad_thresh ? pad_thresh : 28; rel_x = buf[2]; rel_y = buf[3]; if (ictx->rc_proto == RC_PROTO_BIT_IMON && pad_stabilize) { if ((buf[1] == 0) && ((rel_x != 0) || (rel_y != 0))) { dir = stabilize((int)rel_x, (int)rel_y, timeout, threshold); if (!dir) { spin_lock_irqsave(&ictx->kc_lock, flags); ictx->kc = KEY_UNKNOWN; spin_unlock_irqrestore(&ictx->kc_lock, flags); return; } buf[2] = dir & 0xFF; buf[3] = (dir >> 8) & 0xFF; scancode = be32_to_cpu(*((__be32 *)buf)); } } else { /* * Hack alert: instead of using keycodes, we have * to use hard-coded scancodes here... */ if (abs(rel_y) > abs(rel_x)) { buf[2] = (rel_y > 0) ? 0x7F : 0x80; buf[3] = 0; if (rel_y > 0) scancode = 0x01007f00; /* KEY_DOWN */ else scancode = 0x01008000; /* KEY_UP */ } else { buf[2] = 0; buf[3] = (rel_x > 0) ? 0x7F : 0x80; if (rel_x > 0) scancode = 0x0100007f; /* KEY_RIGHT */ else scancode = 0x01000080; /* KEY_LEFT */ } } /* * Handle on-board decoded pad events for e.g. older VFD/iMON-Pad * device (15c2:ffdc). The remote generates various codes from * 0x68nnnnB7 to 0x6AnnnnB7, the left mouse button generates * 0x688301b7 and the right one 0x688481b7. All other keys generate * 0x2nnnnnnn. Position coordinate is encoded in buf[1] and buf[2] with * reversed endianness. Extract direction from buffer, rotate endianness, * adjust sign and feed the values into stabilize(). The resulting codes * will be 0x01008000, 0x01007F00, which match the newer devices. */ } else { timeout = 10; /* in msecs */ /* (2*threshold) x (2*threshold) square */ threshold = pad_thresh ? pad_thresh : 15; /* buf[1] is x */ rel_x = (buf[1] & 0x08) | (buf[1] & 0x10) >> 2 | (buf[1] & 0x20) >> 4 | (buf[1] & 0x40) >> 6; if (buf[0] & 0x02) rel_x |= ~0x10+1; /* buf[2] is y */ rel_y = (buf[2] & 0x08) | (buf[2] & 0x10) >> 2 | (buf[2] & 0x20) >> 4 | (buf[2] & 0x40) >> 6; if (buf[0] & 0x01) rel_y |= ~0x10+1; buf[0] = 0x01; buf[1] = buf[4] = buf[5] = buf[6] = buf[7] = 0; if (ictx->rc_proto == RC_PROTO_BIT_IMON && pad_stabilize) { dir = stabilize((int)rel_x, (int)rel_y, timeout, threshold); if (!dir) { spin_lock_irqsave(&ictx->kc_lock, flags); ictx->kc = KEY_UNKNOWN; spin_unlock_irqrestore(&ictx->kc_lock, flags); return; } buf[2] = dir & 0xFF; buf[3] = (dir >> 8) & 0xFF; scancode = be32_to_cpu(*((__be32 *)buf)); } else { /* * Hack alert: instead of using keycodes, we have * to use hard-coded scancodes here... */ if (abs(rel_y) > abs(rel_x)) { buf[2] = (rel_y > 0) ? 0x7F : 0x80; buf[3] = 0; if (rel_y > 0) scancode = 0x01007f00; /* KEY_DOWN */ else scancode = 0x01008000; /* KEY_UP */ } else { buf[2] = 0; buf[3] = (rel_x > 0) ? 0x7F : 0x80; if (rel_x > 0) scancode = 0x0100007f; /* KEY_RIGHT */ else scancode = 0x01000080; /* KEY_LEFT */ } } } if (scancode) { spin_lock_irqsave(&ictx->kc_lock, flags); ictx->kc = imon_remote_key_lookup(ictx, scancode); spin_unlock_irqrestore(&ictx->kc_lock, flags); } } /* * figure out if these is a press or a release. We don't actually * care about repeats, as those will be auto-generated within the IR * subsystem for repeating scancodes. */ static int imon_parse_press_type(struct imon_context *ictx, unsigned char *buf, u8 ktype) { int press_type = 0; unsigned long flags; spin_lock_irqsave(&ictx->kc_lock, flags); /* key release of 0x02XXXXXX key */ if (ictx->kc == KEY_RESERVED && buf[0] == 0x02 && buf[3] == 0x00) ictx->kc = ictx->last_keycode; /* mouse button release on (some) 0xffdc devices */ else if (ictx->kc == KEY_RESERVED && buf[0] == 0x68 && buf[1] == 0x82 && buf[2] == 0x81 && buf[3] == 0xb7) ictx->kc = ictx->last_keycode; /* mouse button release on (some other) 0xffdc devices */ else if (ictx->kc == KEY_RESERVED && buf[0] == 0x01 && buf[1] == 0x00 && buf[2] == 0x81 && buf[3] == 0xb7) ictx->kc = ictx->last_keycode; /* mce-specific button handling, no keyup events */ else if (ktype == IMON_KEY_MCE) { ictx->rc_toggle = buf[2]; press_type = 1; /* incoherent or irrelevant data */ } else if (ictx->kc == KEY_RESERVED) press_type = -EINVAL; /* key release of 0xXXXXXXb7 key */ else if (ictx->release_code) press_type = 0; /* this is a button press */ else press_type = 1; spin_unlock_irqrestore(&ictx->kc_lock, flags); return press_type; } /* * Process the incoming packet */ static void imon_incoming_packet(struct imon_context *ictx, struct urb *urb, int intf) { int len = urb->actual_length; unsigned char *buf = urb->transfer_buffer; struct device *dev = ictx->dev; unsigned long flags; u32 kc; u64 scancode; int press_type = 0; ktime_t t; static ktime_t prev_time; u8 ktype; /* filter out junk data on the older 0xffdc imon devices */ if ((buf[0] == 0xff) && (buf[1] == 0xff) && (buf[2] == 0xff)) return; /* Figure out what key was pressed */ if (len == 8 && buf[7] == 0xee) { scancode = be64_to_cpu(*((__be64 *)buf)); ktype = IMON_KEY_PANEL; kc = imon_panel_key_lookup(ictx, scancode); ictx->release_code = false; } else { scancode = be32_to_cpu(*((__be32 *)buf)); if (ictx->rc_proto == RC_PROTO_BIT_RC6_MCE) { ktype = IMON_KEY_IMON; if (buf[0] == 0x80) ktype = IMON_KEY_MCE; kc = imon_mce_key_lookup(ictx, scancode); } else { ktype = IMON_KEY_IMON; kc = imon_remote_key_lookup(ictx, scancode); } } spin_lock_irqsave(&ictx->kc_lock, flags); /* keyboard/mouse mode toggle button */ if (kc == KEY_KEYBOARD && !ictx->release_code) { ictx->last_keycode = kc; if (!nomouse) { ictx->pad_mouse = !ictx->pad_mouse; dev_dbg(dev, "toggling to %s mode\n", ictx->pad_mouse ? "mouse" : "keyboard"); spin_unlock_irqrestore(&ictx->kc_lock, flags); return; } else { ictx->pad_mouse = false; dev_dbg(dev, "mouse mode disabled, passing key value\n"); } } ictx->kc = kc; spin_unlock_irqrestore(&ictx->kc_lock, flags); /* send touchscreen events through input subsystem if touchpad data */ if (ictx->touch && len == 8 && buf[7] == 0x86) { imon_touch_event(ictx, buf); return; /* look for mouse events with pad in mouse mode */ } else if (ictx->pad_mouse) { if (imon_mouse_event(ictx, buf, len)) return; } /* Now for some special handling to convert pad input to arrow keys */ if (((len == 5) && (buf[0] == 0x01) && (buf[4] == 0x00)) || ((len == 8) && (buf[0] & 0x40) && !(buf[1] & 0x1 || buf[1] >> 2 & 0x1))) { len = 8; imon_pad_to_keys(ictx, buf); } if (debug) { printk(KERN_INFO "intf%d decoded packet: %*ph\n", intf, len, buf); } press_type = imon_parse_press_type(ictx, buf, ktype); if (press_type < 0) goto not_input_data; if (ktype != IMON_KEY_PANEL) { if (press_type == 0) rc_keyup(ictx->rdev); else { enum rc_proto proto; if (ictx->rc_proto == RC_PROTO_BIT_RC6_MCE) proto = RC_PROTO_RC6_MCE; else if (ictx->rc_proto == RC_PROTO_BIT_IMON) proto = RC_PROTO_IMON; else return; rc_keydown(ictx->rdev, proto, ictx->rc_scancode, ictx->rc_toggle); spin_lock_irqsave(&ictx->kc_lock, flags); ictx->last_keycode = ictx->kc; spin_unlock_irqrestore(&ictx->kc_lock, flags); } return; } /* Only panel type events left to process now */ spin_lock_irqsave(&ictx->kc_lock, flags); t = ktime_get(); /* KEY repeats from knob and panel that need to be suppressed */ if (ictx->kc == KEY_MUTE || ictx->dev_descr->flags & IMON_SUPPRESS_REPEATED_KEYS) { if (ictx->kc == ictx->last_keycode && ktime_ms_delta(t, prev_time) < ictx->idev->rep[REP_DELAY]) { spin_unlock_irqrestore(&ictx->kc_lock, flags); return; } } prev_time = t; kc = ictx->kc; spin_unlock_irqrestore(&ictx->kc_lock, flags); input_report_key(ictx->idev, kc, press_type); input_sync(ictx->idev); /* panel keys don't generate a release */ input_report_key(ictx->idev, kc, 0); input_sync(ictx->idev); spin_lock_irqsave(&ictx->kc_lock, flags); ictx->last_keycode = kc; spin_unlock_irqrestore(&ictx->kc_lock, flags); return; not_input_data: if (len != 8) { dev_warn(dev, "imon %s: invalid incoming packet size (len = %d, intf%d)\n", __func__, len, intf); return; } /* iMON 2.4G associate frame */ if (buf[0] == 0x00 && buf[2] == 0xFF && /* REFID */ buf[3] == 0xFF && buf[4] == 0xFF && buf[5] == 0xFF && /* iMON 2.4G */ ((buf[6] == 0x4E && buf[7] == 0xDF) || /* LT */ (buf[6] == 0x5E && buf[7] == 0xDF))) { /* DT */ dev_warn(dev, "%s: remote associated refid=%02X\n", __func__, buf[1]); ictx->rf_isassociating = false; } } /* * Callback function for USB core API: receive data */ static void usb_rx_callback_intf0(struct urb *urb) { struct imon_context *ictx; int intfnum = 0; if (!urb) return; ictx = (struct imon_context *)urb->context; if (!ictx) return; /* * if we get a callback before we're done configuring the hardware, we * can't yet process the data, as there's nowhere to send it, but we * still need to submit a new rx URB to avoid wedging the hardware */ if (!ictx->dev_present_intf0) goto out; switch (urb->status) { case -ENOENT: /* usbcore unlink successful! */ return; case -ESHUTDOWN: /* transport endpoint was shut down */ break; case 0: imon_incoming_packet(ictx, urb, intfnum); break; default: dev_warn(ictx->dev, "imon %s: status(%d): ignored\n", __func__, urb->status); break; } out: usb_submit_urb(ictx->rx_urb_intf0, GFP_ATOMIC); } static void usb_rx_callback_intf1(struct urb *urb) { struct imon_context *ictx; int intfnum = 1; if (!urb) return; ictx = (struct imon_context *)urb->context; if (!ictx) return; /* * if we get a callback before we're done configuring the hardware, we * can't yet process the data, as there's nowhere to send it, but we * still need to submit a new rx URB to avoid wedging the hardware */ if (!ictx->dev_present_intf1) goto out; switch (urb->status) { case -ENOENT: /* usbcore unlink successful! */ return; case -ESHUTDOWN: /* transport endpoint was shut down */ break; case 0: imon_incoming_packet(ictx, urb, intfnum); break; default: dev_warn(ictx->dev, "imon %s: status(%d): ignored\n", __func__, urb->status); break; } out: usb_submit_urb(ictx->rx_urb_intf1, GFP_ATOMIC); } /* * The 0x15c2:0xffdc device ID was used for umpteen different imon * devices, and all of them constantly spew interrupts, even when there * is no actual data to report. However, byte 6 of this buffer looks like * its unique across device variants, so we're trying to key off that to * figure out which display type (if any) and what IR protocol the device * actually supports. These devices have their IR protocol hard-coded into * their firmware, they can't be changed on the fly like the newer hardware. */ static void imon_get_ffdc_type(struct imon_context *ictx) { u8 ffdc_cfg_byte = ictx->usb_rx_buf[6]; u8 detected_display_type = IMON_DISPLAY_TYPE_NONE; u64 allowed_protos = RC_PROTO_BIT_IMON; switch (ffdc_cfg_byte) { /* iMON Knob, no display, iMON IR + vol knob */ case 0x21: dev_info(ictx->dev, "0xffdc iMON Knob, iMON IR"); ictx->display_supported = false; break; /* iMON 2.4G LT (usb stick), no display, iMON RF */ case 0x4e: dev_info(ictx->dev, "0xffdc iMON 2.4G LT, iMON RF"); ictx->display_supported = false; ictx->rf_device = true; break; /* iMON VFD, no IR (does have vol knob tho) */ case 0x35: dev_info(ictx->dev, "0xffdc iMON VFD + knob, no IR"); detected_display_type = IMON_DISPLAY_TYPE_VFD; break; /* iMON VFD, iMON IR */ case 0x24: case 0x30: case 0x85: dev_info(ictx->dev, "0xffdc iMON VFD, iMON IR"); detected_display_type = IMON_DISPLAY_TYPE_VFD; break; /* iMON VFD, MCE IR */ case 0x46: case 0x9e: dev_info(ictx->dev, "0xffdc iMON VFD, MCE IR"); detected_display_type = IMON_DISPLAY_TYPE_VFD; allowed_protos = RC_PROTO_BIT_RC6_MCE; break; /* iMON VFD, iMON or MCE IR */ case 0x7e: dev_info(ictx->dev, "0xffdc iMON VFD, iMON or MCE IR"); detected_display_type = IMON_DISPLAY_TYPE_VFD; allowed_protos |= RC_PROTO_BIT_RC6_MCE; break; /* iMON LCD, MCE IR */ case 0x9f: dev_info(ictx->dev, "0xffdc iMON LCD, MCE IR"); detected_display_type = IMON_DISPLAY_TYPE_LCD; allowed_protos = RC_PROTO_BIT_RC6_MCE; break; /* no display, iMON IR */ case 0x26: dev_info(ictx->dev, "0xffdc iMON Inside, iMON IR"); ictx->display_supported = false; break; /* Soundgraph iMON UltraBay */ case 0x98: dev_info(ictx->dev, "0xffdc iMON UltraBay, LCD + IR"); detected_display_type = IMON_DISPLAY_TYPE_LCD; allowed_protos = RC_PROTO_BIT_IMON | RC_PROTO_BIT_RC6_MCE; ictx->dev_descr = &ultrabay_table; break; default: dev_info(ictx->dev, "Unknown 0xffdc device, defaulting to VFD and iMON IR"); detected_display_type = IMON_DISPLAY_TYPE_VFD; /* * We don't know which one it is, allow user to set the * RC6 one from userspace if IMON wasn't correct. */ allowed_protos |= RC_PROTO_BIT_RC6_MCE; break; } printk(KERN_CONT " (id 0x%02x)\n", ffdc_cfg_byte); ictx->display_type = detected_display_type; ictx->rc_proto = allowed_protos; } static void imon_set_display_type(struct imon_context *ictx) { u8 configured_display_type = IMON_DISPLAY_TYPE_VFD; /* * Try to auto-detect the type of display if the user hasn't set * it by hand via the display_type modparam. Default is VFD. */ if (display_type == IMON_DISPLAY_TYPE_AUTO) { switch (ictx->product) { case 0xffdc: /* set in imon_get_ffdc_type() */ configured_display_type = ictx->display_type; break; case 0x0034: case 0x0035: configured_display_type = IMON_DISPLAY_TYPE_VGA; break; case 0x0038: case 0x0039: case 0x0045: configured_display_type = IMON_DISPLAY_TYPE_LCD; break; case 0x003c: case 0x0041: case 0x0042: case 0x0043: configured_display_type = IMON_DISPLAY_TYPE_NONE; ictx->display_supported = false; break; case 0x0036: case 0x0044: default: configured_display_type = IMON_DISPLAY_TYPE_VFD; break; } } else { configured_display_type = display_type; if (display_type == IMON_DISPLAY_TYPE_NONE) ictx->display_supported = false; else ictx->display_supported = true; dev_info(ictx->dev, "%s: overriding display type to %d via modparam\n", __func__, display_type); } ictx->display_type = configured_display_type; } static struct rc_dev *imon_init_rdev(struct imon_context *ictx) { struct rc_dev *rdev; int ret; static const unsigned char fp_packet[] = { 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88 }; rdev = rc_allocate_device(RC_DRIVER_SCANCODE); if (!rdev) { dev_err(ictx->dev, "remote control dev allocation failed\n"); goto out; } snprintf(ictx->name_rdev, sizeof(ictx->name_rdev), "iMON Remote (%04x:%04x)", ictx->vendor, ictx->product); usb_make_path(ictx->usbdev_intf0, ictx->phys_rdev, sizeof(ictx->phys_rdev)); strlcat(ictx->phys_rdev, "/input0", sizeof(ictx->phys_rdev)); rdev->device_name = ictx->name_rdev; rdev->input_phys = ictx->phys_rdev; usb_to_input_id(ictx->usbdev_intf0, &rdev->input_id); rdev->dev.parent = ictx->dev; rdev->priv = ictx; /* iMON PAD or MCE */ rdev->allowed_protocols = RC_PROTO_BIT_IMON | RC_PROTO_BIT_RC6_MCE; rdev->change_protocol = imon_ir_change_protocol; rdev->driver_name = MOD_NAME; /* Enable front-panel buttons and/or knobs */ memcpy(ictx->usb_tx_buf, &fp_packet, sizeof(fp_packet)); ret = send_packet(ictx); /* Not fatal, but warn about it */ if (ret) dev_info(ictx->dev, "panel buttons/knobs setup failed\n"); if (ictx->product == 0xffdc) { imon_get_ffdc_type(ictx); rdev->allowed_protocols = ictx->rc_proto; } imon_set_display_type(ictx); if (ictx->rc_proto == RC_PROTO_BIT_RC6_MCE) rdev->map_name = RC_MAP_IMON_MCE; else rdev->map_name = RC_MAP_IMON_PAD; ret = rc_register_device(rdev); if (ret < 0) { dev_err(ictx->dev, "remote input dev register failed\n"); goto out; } return rdev; out: rc_free_device(rdev); return NULL; } static struct input_dev *imon_init_idev(struct imon_context *ictx) { const struct imon_panel_key_table *key_table; struct input_dev *idev; int ret, i; key_table = ictx->dev_descr->key_table; idev = input_allocate_device(); if (!idev) goto out; snprintf(ictx->name_idev, sizeof(ictx->name_idev), "iMON Panel, Knob and Mouse(%04x:%04x)", ictx->vendor, ictx->product); idev->name = ictx->name_idev; usb_make_path(ictx->usbdev_intf0, ictx->phys_idev, sizeof(ictx->phys_idev)); strlcat(ictx->phys_idev, "/input1", sizeof(ictx->phys_idev)); idev->phys = ictx->phys_idev; idev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) | BIT_MASK(EV_REL); idev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_RIGHT); idev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y) | BIT_MASK(REL_WHEEL); /* panel and/or knob code support */ for (i = 0; key_table[i].hw_code != 0; i++) { u32 kc = key_table[i].keycode; __set_bit(kc, idev->keybit); } usb_to_input_id(ictx->usbdev_intf0, &idev->id); idev->dev.parent = ictx->dev; input_set_drvdata(idev, ictx); ret = input_register_device(idev); if (ret < 0) { dev_err(ictx->dev, "input dev register failed\n"); goto out; } return idev; out: input_free_device(idev); return NULL; } static struct input_dev *imon_init_touch(struct imon_context *ictx) { struct input_dev *touch; int ret; touch = input_allocate_device(); if (!touch) goto touch_alloc_failed; snprintf(ictx->name_touch, sizeof(ictx->name_touch), "iMON USB Touchscreen (%04x:%04x)", ictx->vendor, ictx->product); touch->name = ictx->name_touch; usb_make_path(ictx->usbdev_intf1, ictx->phys_touch, sizeof(ictx->phys_touch)); strlcat(ictx->phys_touch, "/input2", sizeof(ictx->phys_touch)); touch->phys = ictx->phys_touch; touch->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); touch->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH); input_set_abs_params(touch, ABS_X, 0x00, 0xfff, 0, 0); input_set_abs_params(touch, ABS_Y, 0x00, 0xfff, 0, 0); input_set_drvdata(touch, ictx); usb_to_input_id(ictx->usbdev_intf1, &touch->id); touch->dev.parent = ictx->dev; ret = input_register_device(touch); if (ret < 0) { dev_info(ictx->dev, "touchscreen input dev register failed\n"); goto touch_register_failed; } return touch; touch_register_failed: input_free_device(touch); touch_alloc_failed: return NULL; } static bool imon_find_endpoints(struct imon_context *ictx, struct usb_host_interface *iface_desc) { struct usb_endpoint_descriptor *ep; struct usb_endpoint_descriptor *rx_endpoint = NULL; struct usb_endpoint_descriptor *tx_endpoint = NULL; int ifnum = iface_desc->desc.bInterfaceNumber; int num_endpts = iface_desc->desc.bNumEndpoints; int i, ep_dir, ep_type; bool ir_ep_found = false; bool display_ep_found = false; bool tx_control = false; /* * Scan the endpoint list and set: * first input endpoint = IR endpoint * first output endpoint = display endpoint */ for (i = 0; i < num_endpts && !(ir_ep_found && display_ep_found); ++i) { ep = &iface_desc->endpoint[i].desc; ep_dir = ep->bEndpointAddress & USB_ENDPOINT_DIR_MASK; ep_type = usb_endpoint_type(ep); if (!ir_ep_found && ep_dir == USB_DIR_IN && ep_type == USB_ENDPOINT_XFER_INT) { rx_endpoint = ep; ir_ep_found = true; dev_dbg(ictx->dev, "%s: found IR endpoint\n", __func__); } else if (!display_ep_found && ep_dir == USB_DIR_OUT && ep_type == USB_ENDPOINT_XFER_INT) { tx_endpoint = ep; display_ep_found = true; dev_dbg(ictx->dev, "%s: found display endpoint\n", __func__); } } if (ifnum == 0) { ictx->rx_endpoint_intf0 = rx_endpoint; /* * tx is used to send characters to lcd/vfd, associate RF * remotes, set IR protocol, and maybe more... */ ictx->tx_endpoint = tx_endpoint; } else { ictx->rx_endpoint_intf1 = rx_endpoint; } /* * If we didn't find a display endpoint, this is probably one of the * newer iMON devices that use control urb instead of interrupt */ if (!display_ep_found) { tx_control = true; display_ep_found = true; dev_dbg(ictx->dev, "%s: device uses control endpoint, not interface OUT endpoint\n", __func__); } /* * Some iMON receivers have no display. Unfortunately, it seems * that SoundGraph recycles device IDs between devices both with * and without... :\ */ if (ictx->display_type == IMON_DISPLAY_TYPE_NONE) { display_ep_found = false; dev_dbg(ictx->dev, "%s: device has no display\n", __func__); } /* * iMON Touch devices have a VGA touchscreen, but no "display", as * that refers to e.g. /dev/lcd0 (a character device LCD or VFD). */ if (ictx->display_type == IMON_DISPLAY_TYPE_VGA) { display_ep_found = false; dev_dbg(ictx->dev, "%s: iMON Touch device found\n", __func__); } /* Input endpoint is mandatory */ if (!ir_ep_found) pr_err("no valid input (IR) endpoint found\n"); ictx->tx_control = tx_control; if (display_ep_found) ictx->display_supported = true; return ir_ep_found; } static struct imon_context *imon_init_intf0(struct usb_interface *intf, const struct usb_device_id *id) { struct imon_context *ictx; struct urb *rx_urb; struct urb *tx_urb; struct device *dev = &intf->dev; struct usb_host_interface *iface_desc; int ret = -ENOMEM; ictx = kzalloc(sizeof(*ictx), GFP_KERNEL); if (!ictx) goto exit; rx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!rx_urb) goto rx_urb_alloc_failed; tx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!tx_urb) goto tx_urb_alloc_failed; mutex_init(&ictx->lock); spin_lock_init(&ictx->kc_lock); mutex_lock(&ictx->lock); ictx->dev = dev; ictx->usbdev_intf0 = usb_get_dev(interface_to_usbdev(intf)); ictx->rx_urb_intf0 = rx_urb; ictx->tx_urb = tx_urb; ictx->rf_device = false; init_completion(&ictx->tx.finished); ictx->vendor = le16_to_cpu(ictx->usbdev_intf0->descriptor.idVendor); ictx->product = le16_to_cpu(ictx->usbdev_intf0->descriptor.idProduct); /* save drive info for later accessing the panel/knob key table */ ictx->dev_descr = (struct imon_usb_dev_descr *)id->driver_info; /* default send_packet delay is 5ms but some devices need more */ ictx->send_packet_delay = ictx->dev_descr->flags & IMON_NEED_20MS_PKT_DELAY ? 20 : 5; ret = -ENODEV; iface_desc = intf->cur_altsetting; if (!imon_find_endpoints(ictx, iface_desc)) { goto find_endpoint_failed; } usb_fill_int_urb(ictx->rx_urb_intf0, ictx->usbdev_intf0, usb_rcvintpipe(ictx->usbdev_intf0, ictx->rx_endpoint_intf0->bEndpointAddress), ictx->usb_rx_buf, sizeof(ictx->usb_rx_buf), usb_rx_callback_intf0, ictx, ictx->rx_endpoint_intf0->bInterval); ret = usb_submit_urb(ictx->rx_urb_intf0, GFP_KERNEL); if (ret) { pr_err("usb_submit_urb failed for intf0 (%d)\n", ret); goto urb_submit_failed; } ictx->idev = imon_init_idev(ictx); if (!ictx->idev) { dev_err(dev, "%s: input device setup failed\n", __func__); goto idev_setup_failed; } ictx->rdev = imon_init_rdev(ictx); if (!ictx->rdev) { dev_err(dev, "%s: rc device setup failed\n", __func__); goto rdev_setup_failed; } ictx->dev_present_intf0 = true; mutex_unlock(&ictx->lock); return ictx; rdev_setup_failed: input_unregister_device(ictx->idev); idev_setup_failed: usb_kill_urb(ictx->rx_urb_intf0); urb_submit_failed: find_endpoint_failed: usb_put_dev(ictx->usbdev_intf0); mutex_unlock(&ictx->lock); usb_free_urb(tx_urb); tx_urb_alloc_failed: usb_free_urb(rx_urb); rx_urb_alloc_failed: kfree(ictx); exit: dev_err(dev, "unable to initialize intf0, err %d\n", ret); return NULL; } static struct imon_context *imon_init_intf1(struct usb_interface *intf, struct imon_context *ictx) { struct urb *rx_urb; struct usb_host_interface *iface_desc; int ret = -ENOMEM; rx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!rx_urb) goto rx_urb_alloc_failed; mutex_lock(&ictx->lock); if (ictx->display_type == IMON_DISPLAY_TYPE_VGA) { timer_setup(&ictx->ttimer, imon_touch_display_timeout, 0); } ictx->usbdev_intf1 = usb_get_dev(interface_to_usbdev(intf)); ictx->rx_urb_intf1 = rx_urb; ret = -ENODEV; iface_desc = intf->cur_altsetting; if (!imon_find_endpoints(ictx, iface_desc)) goto find_endpoint_failed; if (ictx->display_type == IMON_DISPLAY_TYPE_VGA) { ictx->touch = imon_init_touch(ictx); if (!ictx->touch) goto touch_setup_failed; } else ictx->touch = NULL; usb_fill_int_urb(ictx->rx_urb_intf1, ictx->usbdev_intf1, usb_rcvintpipe(ictx->usbdev_intf1, ictx->rx_endpoint_intf1->bEndpointAddress), ictx->usb_rx_buf, sizeof(ictx->usb_rx_buf), usb_rx_callback_intf1, ictx, ictx->rx_endpoint_intf1->bInterval); ret = usb_submit_urb(ictx->rx_urb_intf1, GFP_KERNEL); if (ret) { pr_err("usb_submit_urb failed for intf1 (%d)\n", ret); goto urb_submit_failed; } ictx->dev_present_intf1 = true; mutex_unlock(&ictx->lock); return ictx; urb_submit_failed: if (ictx->touch) input_unregister_device(ictx->touch); touch_setup_failed: find_endpoint_failed: usb_put_dev(ictx->usbdev_intf1); ictx->usbdev_intf1 = NULL; mutex_unlock(&ictx->lock); usb_free_urb(rx_urb); ictx->rx_urb_intf1 = NULL; rx_urb_alloc_failed: dev_err(ictx->dev, "unable to initialize intf1, err %d\n", ret); return NULL; } static void imon_init_display(struct imon_context *ictx, struct usb_interface *intf) { int ret; dev_dbg(ictx->dev, "Registering iMON display with sysfs\n"); /* set up sysfs entry for built-in clock */ ret = sysfs_create_group(&intf->dev.kobj, &imon_display_attr_group); if (ret) dev_err(ictx->dev, "Could not create display sysfs entries(%d)", ret); if (ictx->display_type == IMON_DISPLAY_TYPE_LCD) ret = usb_register_dev(intf, &imon_lcd_class); else ret = usb_register_dev(intf, &imon_vfd_class); if (ret) /* Not a fatal error, so ignore */ dev_info(ictx->dev, "could not get a minor number for display\n"); } /* * Callback function for USB core API: Probe */ static int imon_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_device *usbdev = NULL; struct usb_host_interface *iface_desc = NULL; struct usb_interface *first_if; struct device *dev = &interface->dev; int ifnum, sysfs_err; int ret = 0; struct imon_context *ictx = NULL; u16 vendor, product; usbdev = usb_get_dev(interface_to_usbdev(interface)); iface_desc = interface->cur_altsetting; ifnum = iface_desc->desc.bInterfaceNumber; vendor = le16_to_cpu(usbdev->descriptor.idVendor); product = le16_to_cpu(usbdev->descriptor.idProduct); dev_dbg(dev, "%s: found iMON device (%04x:%04x, intf%d)\n", __func__, vendor, product, ifnum); first_if = usb_ifnum_to_if(usbdev, 0); if (!first_if) { ret = -ENODEV; goto fail; } if (first_if->dev.driver != interface->dev.driver) { dev_err(&interface->dev, "inconsistent driver matching\n"); ret = -EINVAL; goto fail; } if (ifnum == 0) { ictx = imon_init_intf0(interface, id); if (!ictx) { pr_err("failed to initialize context!\n"); ret = -ENODEV; goto fail; } refcount_set(&ictx->users, 1); } else { /* this is the secondary interface on the device */ struct imon_context *first_if_ctx = usb_get_intfdata(first_if); /* fail early if first intf failed to register */ if (!first_if_ctx) { ret = -ENODEV; goto fail; } ictx = imon_init_intf1(interface, first_if_ctx); if (!ictx) { pr_err("failed to attach to context!\n"); ret = -ENODEV; goto fail; } refcount_inc(&ictx->users); } usb_set_intfdata(interface, ictx); if (ifnum == 0) { if (product == 0xffdc && ictx->rf_device) { sysfs_err = sysfs_create_group(&interface->dev.kobj, &imon_rf_attr_group); if (sysfs_err) pr_err("Could not create RF sysfs entries(%d)\n", sysfs_err); } if (ictx->display_supported) imon_init_display(ictx, interface); } dev_info(dev, "iMON device (%04x:%04x, intf%d) on usb<%d:%d> initialized\n", vendor, product, ifnum, usbdev->bus->busnum, usbdev->devnum); usb_put_dev(usbdev); return 0; fail: usb_put_dev(usbdev); dev_err(dev, "unable to register, err %d\n", ret); return ret; } /* * Callback function for USB core API: disconnect */ static void imon_disconnect(struct usb_interface *interface) { struct imon_context *ictx; struct device *dev; int ifnum; ictx = usb_get_intfdata(interface); ictx->disconnected = true; dev = ictx->dev; ifnum = interface->cur_altsetting->desc.bInterfaceNumber; /* * sysfs_remove_group is safe to call even if sysfs_create_group * hasn't been called */ sysfs_remove_group(&interface->dev.kobj, &imon_display_attr_group); sysfs_remove_group(&interface->dev.kobj, &imon_rf_attr_group); usb_set_intfdata(interface, NULL); /* Abort ongoing write */ if (ictx->tx.busy) { usb_kill_urb(ictx->tx_urb); complete(&ictx->tx.finished); } if (ifnum == 0) { ictx->dev_present_intf0 = false; usb_kill_urb(ictx->rx_urb_intf0); input_unregister_device(ictx->idev); rc_unregister_device(ictx->rdev); if (ictx->display_supported) { if (ictx->display_type == IMON_DISPLAY_TYPE_LCD) usb_deregister_dev(interface, &imon_lcd_class); else if (ictx->display_type == IMON_DISPLAY_TYPE_VFD) usb_deregister_dev(interface, &imon_vfd_class); } usb_put_dev(ictx->usbdev_intf0); } else { ictx->dev_present_intf1 = false; usb_kill_urb(ictx->rx_urb_intf1); if (ictx->display_type == IMON_DISPLAY_TYPE_VGA) { del_timer_sync(&ictx->ttimer); input_unregister_device(ictx->touch); } usb_put_dev(ictx->usbdev_intf1); } if (refcount_dec_and_test(&ictx->users)) free_imon_context(ictx); dev_dbg(dev, "%s: iMON device (intf%d) disconnected\n", __func__, ifnum); } static int imon_suspend(struct usb_interface *intf, pm_message_t message) { struct imon_context *ictx = usb_get_intfdata(intf); int ifnum = intf->cur_altsetting->desc.bInterfaceNumber; if (ifnum == 0) usb_kill_urb(ictx->rx_urb_intf0); else usb_kill_urb(ictx->rx_urb_intf1); return 0; } static int imon_resume(struct usb_interface *intf) { int rc = 0; struct imon_context *ictx = usb_get_intfdata(intf); int ifnum = intf->cur_altsetting->desc.bInterfaceNumber; if (ifnum == 0) { usb_fill_int_urb(ictx->rx_urb_intf0, ictx->usbdev_intf0, usb_rcvintpipe(ictx->usbdev_intf0, ictx->rx_endpoint_intf0->bEndpointAddress), ictx->usb_rx_buf, sizeof(ictx->usb_rx_buf), usb_rx_callback_intf0, ictx, ictx->rx_endpoint_intf0->bInterval); rc = usb_submit_urb(ictx->rx_urb_intf0, GFP_NOIO); } else { usb_fill_int_urb(ictx->rx_urb_intf1, ictx->usbdev_intf1, usb_rcvintpipe(ictx->usbdev_intf1, ictx->rx_endpoint_intf1->bEndpointAddress), ictx->usb_rx_buf, sizeof(ictx->usb_rx_buf), usb_rx_callback_intf1, ictx, ictx->rx_endpoint_intf1->bInterval); rc = usb_submit_urb(ictx->rx_urb_intf1, GFP_NOIO); } return rc; } module_usb_driver(imon_driver); |
11 7 26 11 1 1 8 1 2 3 20 11 1 8 1 2 1 1 1 11 1 1 20 3 10 1 1 1 1 1 7 7 17 2 9 8 5 3 6 2 10 7 1 2 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 | // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/slab.h> #include <linux/sched/rt.h> #include <linux/sched/task.h> #include "futex.h" #include "../locking/rtmutex_common.h" /* * PI code: */ int refill_pi_state_cache(void) { struct futex_pi_state *pi_state; if (likely(current->pi_state_cache)) return 0; pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL); if (!pi_state) return -ENOMEM; INIT_LIST_HEAD(&pi_state->list); /* pi_mutex gets initialized later */ pi_state->owner = NULL; refcount_set(&pi_state->refcount, 1); pi_state->key = FUTEX_KEY_INIT; current->pi_state_cache = pi_state; return 0; } static struct futex_pi_state *alloc_pi_state(void) { struct futex_pi_state *pi_state = current->pi_state_cache; WARN_ON(!pi_state); current->pi_state_cache = NULL; return pi_state; } static void pi_state_update_owner(struct futex_pi_state *pi_state, struct task_struct *new_owner) { struct task_struct *old_owner = pi_state->owner; lockdep_assert_held(&pi_state->pi_mutex.wait_lock); if (old_owner) { raw_spin_lock(&old_owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); raw_spin_unlock(&old_owner->pi_lock); } if (new_owner) { raw_spin_lock(&new_owner->pi_lock); WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &new_owner->pi_state_list); pi_state->owner = new_owner; raw_spin_unlock(&new_owner->pi_lock); } } void get_pi_state(struct futex_pi_state *pi_state) { WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount)); } /* * Drops a reference to the pi_state object and frees or caches it * when the last reference is gone. */ void put_pi_state(struct futex_pi_state *pi_state) { if (!pi_state) return; if (!refcount_dec_and_test(&pi_state->refcount)) return; /* * If pi_state->owner is NULL, the owner is most probably dying * and has cleaned up the pi_state already */ if (pi_state->owner) { unsigned long flags; raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); pi_state_update_owner(pi_state, NULL); rt_mutex_proxy_unlock(&pi_state->pi_mutex); raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); } if (current->pi_state_cache) { kfree(pi_state); } else { /* * pi_state->list is already empty. * clear pi_state->owner. * refcount is at 0 - put it back to 1. */ pi_state->owner = NULL; refcount_set(&pi_state->refcount, 1); current->pi_state_cache = pi_state; } } /* * We need to check the following states: * * Waiter | pi_state | pi->owner | uTID | uODIED | ? * * [1] NULL | --- | --- | 0 | 0/1 | Valid * [2] NULL | --- | --- | >0 | 0/1 | Valid * * [3] Found | NULL | -- | Any | 0/1 | Invalid * * [4] Found | Found | NULL | 0 | 1 | Valid * [5] Found | Found | NULL | >0 | 1 | Invalid * * [6] Found | Found | task | 0 | 1 | Valid * * [7] Found | Found | NULL | Any | 0 | Invalid * * [8] Found | Found | task | ==taskTID | 0/1 | Valid * [9] Found | Found | task | 0 | 0 | Invalid * [10] Found | Found | task | !=taskTID | 0/1 | Invalid * * [1] Indicates that the kernel can acquire the futex atomically. We * came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. * * [2] Valid, if TID does not belong to a kernel thread. If no matching * thread is found then it indicates that the owner TID has died. * * [3] Invalid. The waiter is queued on a non PI futex * * [4] Valid state after exit_robust_list(), which sets the user space * value to FUTEX_WAITERS | FUTEX_OWNER_DIED. * * [5] The user space value got manipulated between exit_robust_list() * and exit_pi_state_list() * * [6] Valid state after exit_pi_state_list() which sets the new owner in * the pi_state but cannot access the user space value. * * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. * * [8] Owner and user space value match * * [9] There is no transient state which sets the user space TID to 0 * except exit_robust_list(), but this is indicated by the * FUTEX_OWNER_DIED bit. See [4] * * [10] There is no transient state which leaves owner and user space * TID out of sync. Except one error case where the kernel is denied * write access to the user address, see fixup_pi_state_owner(). * * * Serialization and lifetime rules: * * hb->lock: * * hb -> futex_q, relation * futex_q -> pi_state, relation * * (cannot be raw because hb can contain arbitrary amount * of futex_q's) * * pi_mutex->wait_lock: * * {uval, pi_state} * * (and pi_mutex 'obviously') * * p->pi_lock: * * p->pi_state_list -> pi_state->list, relation * pi_mutex->owner -> pi_state->owner, relation * * pi_state->refcount: * * pi_state lifetime * * * Lock order: * * hb->lock * pi_mutex->wait_lock * p->pi_lock * */ /* * Validate that the existing waiter has a pi_state and sanity check * the pi_state against the user space value. If correct, attach to * it. */ static int attach_to_pi_state(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state, struct futex_pi_state **ps) { pid_t pid = uval & FUTEX_TID_MASK; u32 uval2; int ret; /* * Userspace might have messed up non-PI and PI futexes [3] */ if (unlikely(!pi_state)) return -EINVAL; /* * We get here with hb->lock held, and having found a * futex_top_waiter(). This means that futex_lock_pi() of said futex_q * has dropped the hb->lock in between futex_queue() and futex_unqueue_pi(), * which in turn means that futex_lock_pi() still has a reference on * our pi_state. * * The waiter holding a reference on @pi_state also protects against * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi() * and futex_wait_requeue_pi() as it cannot go to 0 and consequently * free pi_state before we can take a reference ourselves. */ WARN_ON(!refcount_read(&pi_state->refcount)); /* * Now that we have a pi_state, we can acquire wait_lock * and do the state validation. */ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); /* * Since {uval, pi_state} is serialized by wait_lock, and our current * uval was read without holding it, it can have changed. Verify it * still is what we expect it to be, otherwise retry the entire * operation. */ if (futex_get_value_locked(&uval2, uaddr)) goto out_efault; if (uval != uval2) goto out_eagain; /* * Handle the owner died case: */ if (uval & FUTEX_OWNER_DIED) { /* * exit_pi_state_list sets owner to NULL and wakes the * topmost waiter. The task which acquires the * pi_state->rt_mutex will fixup owner. */ if (!pi_state->owner) { /* * No pi state owner, but the user space TID * is not 0. Inconsistent state. [5] */ if (pid) goto out_einval; /* * Take a ref on the state and return success. [4] */ goto out_attach; } /* * If TID is 0, then either the dying owner has not * yet executed exit_pi_state_list() or some waiter * acquired the rtmutex in the pi state, but did not * yet fixup the TID in user space. * * Take a ref on the state and return success. [6] */ if (!pid) goto out_attach; } else { /* * If the owner died bit is not set, then the pi_state * must have an owner. [7] */ if (!pi_state->owner) goto out_einval; } /* * Bail out if user space manipulated the futex value. If pi * state exists then the owner TID must be the same as the * user space TID. [9/10] */ if (pid != task_pid_vnr(pi_state->owner)) goto out_einval; out_attach: get_pi_state(pi_state); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); *ps = pi_state; return 0; out_einval: ret = -EINVAL; goto out_error; out_eagain: ret = -EAGAIN; goto out_error; out_efault: ret = -EFAULT; goto out_error; out_error: raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); return ret; } static int handle_exit_race(u32 __user *uaddr, u32 uval, struct task_struct *tsk) { u32 uval2; /* * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the * caller that the alleged owner is busy. */ if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) return -EBUSY; /* * Reread the user space value to handle the following situation: * * CPU0 CPU1 * * sys_exit() sys_futex() * do_exit() futex_lock_pi() * futex_lock_pi_atomic() * exit_signals(tsk) No waiters: * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID * mm_release(tsk) Set waiter bit * exit_robust_list(tsk) { *uaddr = 0x80000PID; * Set owner died attach_to_pi_owner() { * *uaddr = 0xC0000000; tsk = get_task(PID); * } if (!tsk->flags & PF_EXITING) { * ... attach(); * tsk->futex_state = } else { * FUTEX_STATE_DEAD; if (tsk->futex_state != * FUTEX_STATE_DEAD) * return -EAGAIN; * return -ESRCH; <--- FAIL * } * * Returning ESRCH unconditionally is wrong here because the * user space value has been changed by the exiting task. * * The same logic applies to the case where the exiting task is * already gone. */ if (futex_get_value_locked(&uval2, uaddr)) return -EFAULT; /* If the user space value has changed, try again. */ if (uval2 != uval) return -EAGAIN; /* * The exiting task did not have a robust list, the robust list was * corrupted or the user space value in *uaddr is simply bogus. * Give up and tell user space. */ return -ESRCH; } static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key, struct futex_pi_state **ps) { /* * No existing pi state. First waiter. [2] * * This creates pi_state, we have hb->lock held, this means nothing can * observe this state, wait_lock is irrelevant. */ struct futex_pi_state *pi_state = alloc_pi_state(); /* * Initialize the pi_mutex in locked state and make @p * the owner of it: */ rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); /* Store the key for possible exit cleanups: */ pi_state->key = *key; WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &p->pi_state_list); /* * Assignment without holding pi_state->pi_mutex.wait_lock is safe * because there is no concurrency as the object is not published yet. */ pi_state->owner = p; *ps = pi_state; } /* * Lookup the task for the TID provided from user space and attach to * it after doing proper sanity checks. */ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, struct futex_pi_state **ps, struct task_struct **exiting) { pid_t pid = uval & FUTEX_TID_MASK; struct task_struct *p; /* * We are the first waiter - try to look up the real owner and attach * the new pi_state to it, but bail out when TID = 0 [1] * * The !pid check is paranoid. None of the call sites should end up * with pid == 0, but better safe than sorry. Let the caller retry */ if (!pid) return -EAGAIN; p = find_get_task_by_vpid(pid); if (!p) return handle_exit_race(uaddr, uval, NULL); if (unlikely(p->flags & PF_KTHREAD)) { put_task_struct(p); return -EPERM; } /* * We need to look at the task state to figure out, whether the * task is exiting. To protect against the change of the task state * in futex_exit_release(), we do this protected by p->pi_lock: */ raw_spin_lock_irq(&p->pi_lock); if (unlikely(p->futex_state != FUTEX_STATE_OK)) { /* * The task is on the way out. When the futex state is * FUTEX_STATE_DEAD, we know that the task has finished * the cleanup: */ int ret = handle_exit_race(uaddr, uval, p); raw_spin_unlock_irq(&p->pi_lock); /* * If the owner task is between FUTEX_STATE_EXITING and * FUTEX_STATE_DEAD then store the task pointer and keep * the reference on the task struct. The calling code will * drop all locks, wait for the task to reach * FUTEX_STATE_DEAD and then drop the refcount. This is * required to prevent a live lock when the current task * preempted the exiting task between the two states. */ if (ret == -EBUSY) *exiting = p; else put_task_struct(p); return ret; } __attach_to_pi_owner(p, key, ps); raw_spin_unlock_irq(&p->pi_lock); put_task_struct(p); return 0; } static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) { int err; u32 curval; if (unlikely(should_fail_futex(true))) return -EFAULT; err = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval); if (unlikely(err)) return err; /* If user space value changed, let the caller retry */ return curval != uval ? -EAGAIN : 0; } /** * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex * @uaddr: the pi futex user address * @hb: the pi futex hash bucket * @key: the futex key associated with uaddr and hb * @ps: the pi_state pointer where we store the result of the * lookup * @task: the task to perform the atomic lock work for. This will * be "current" except in the case of requeue pi. * @exiting: Pointer to store the task pointer of the owner task * which is in the middle of exiting * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) * * Return: * - 0 - ready to wait; * - 1 - acquired the lock; * - <0 - error * * The hb->lock must be held by the caller. * * @exiting is only set when the return value is -EBUSY. If so, this holds * a refcount on the exiting task on return and the caller needs to drop it * after waiting for the exit to complete. */ int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, union futex_key *key, struct futex_pi_state **ps, struct task_struct *task, struct task_struct **exiting, int set_waiters) { u32 uval, newval, vpid = task_pid_vnr(task); struct futex_q *top_waiter; int ret; /* * Read the user space value first so we can validate a few * things before proceeding further. */ if (futex_get_value_locked(&uval, uaddr)) return -EFAULT; if (unlikely(should_fail_futex(true))) return -EFAULT; /* * Detect deadlocks. */ if ((unlikely((uval & FUTEX_TID_MASK) == vpid))) return -EDEADLK; if ((unlikely(should_fail_futex(true)))) return -EDEADLK; /* * Lookup existing state first. If it exists, try to attach to * its pi_state. */ top_waiter = futex_top_waiter(hb, key); if (top_waiter) return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); /* * No waiter and user TID is 0. We are here because the * waiters or the owner died bit is set or called from * requeue_cmp_pi or for whatever reason something took the * syscall. */ if (!(uval & FUTEX_TID_MASK)) { /* * We take over the futex. No other waiters and the user space * TID is 0. We preserve the owner died bit. */ newval = uval & FUTEX_OWNER_DIED; newval |= vpid; /* The futex requeue_pi code can enforce the waiters bit */ if (set_waiters) newval |= FUTEX_WAITERS; ret = lock_pi_update_atomic(uaddr, uval, newval); if (ret) return ret; /* * If the waiter bit was requested the caller also needs PI * state attached to the new owner of the user space futex. * * @task is guaranteed to be alive and it cannot be exiting * because it is either sleeping or waiting in * futex_requeue_pi_wakeup_sync(). * * No need to do the full attach_to_pi_owner() exercise * because @task is known and valid. */ if (set_waiters) { raw_spin_lock_irq(&task->pi_lock); __attach_to_pi_owner(task, key, ps); raw_spin_unlock_irq(&task->pi_lock); } return 1; } /* * First waiter. Set the waiters bit before attaching ourself to * the owner. If owner tries to unlock, it will be forced into * the kernel and blocked on hb->lock. */ newval = uval | FUTEX_WAITERS; ret = lock_pi_update_atomic(uaddr, uval, newval); if (ret) return ret; /* * If the update of the user space value succeeded, we try to * attach to the owner. If that fails, no harm done, we only * set the FUTEX_WAITERS bit in the user space variable. */ return attach_to_pi_owner(uaddr, newval, key, ps, exiting); } /* * Caller must hold a reference on @pi_state. */ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state, struct rt_mutex_waiter *top_waiter) { struct task_struct *new_owner; bool postunlock = false; DEFINE_RT_WAKE_Q(wqh); u32 curval, newval; int ret = 0; new_owner = top_waiter->task; /* * We pass it to the next owner. The WAITERS bit is always kept * enabled while there is PI state around. We cleanup the owner * died bit, because we are the owner. */ newval = FUTEX_WAITERS | task_pid_vnr(new_owner); if (unlikely(should_fail_futex(true))) { ret = -EFAULT; goto out_unlock; } ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval); if (!ret && (curval != uval)) { /* * If a unconditional UNLOCK_PI operation (user space did not * try the TID->0 transition) raced with a waiter setting the * FUTEX_WAITERS flag between get_user() and locking the hash * bucket lock, retry the operation. */ if ((FUTEX_TID_MASK & curval) == uval) ret = -EAGAIN; else ret = -EINVAL; } if (!ret) { /* * This is a point of no return; once we modified the uval * there is no going back and subsequent operations must * not fail. */ pi_state_update_owner(pi_state, new_owner); postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh); } out_unlock: raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); if (postunlock) rt_mutex_postunlock(&wqh); return ret; } static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, struct task_struct *argowner) { struct futex_pi_state *pi_state = q->pi_state; struct task_struct *oldowner, *newowner; u32 uval, curval, newval, newtid; int err = 0; oldowner = pi_state->owner; /* * We are here because either: * * - we stole the lock and pi_state->owner needs updating to reflect * that (@argowner == current), * * or: * * - someone stole our lock and we need to fix things to point to the * new owner (@argowner == NULL). * * Either way, we have to replace the TID in the user space variable. * This must be atomic as we have to preserve the owner died bit here. * * Note: We write the user space value _before_ changing the pi_state * because we can fault here. Imagine swapped out pages or a fork * that marked all the anonymous memory readonly for cow. * * Modifying pi_state _before_ the user space value would leave the * pi_state in an inconsistent state when we fault here, because we * need to drop the locks to handle the fault. This might be observed * in the PID checks when attaching to PI state . */ retry: if (!argowner) { if (oldowner != current) { /* * We raced against a concurrent self; things are * already fixed up. Nothing to do. */ return 0; } if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { /* We got the lock. pi_state is correct. Tell caller. */ return 1; } /* * The trylock just failed, so either there is an owner or * there is a higher priority waiter than this one. */ newowner = rt_mutex_owner(&pi_state->pi_mutex); /* * If the higher priority waiter has not yet taken over the * rtmutex then newowner is NULL. We can't return here with * that state because it's inconsistent vs. the user space * state. So drop the locks and try again. It's a valid * situation and not any different from the other retry * conditions. */ if (unlikely(!newowner)) { err = -EAGAIN; goto handle_err; } } else { WARN_ON_ONCE(argowner != current); if (oldowner == current) { /* * We raced against a concurrent self; things are * already fixed up. Nothing to do. */ return 1; } newowner = argowner; } newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; /* Owner died? */ if (!pi_state->owner) newtid |= FUTEX_OWNER_DIED; err = futex_get_value_locked(&uval, uaddr); if (err) goto handle_err; for (;;) { newval = (uval & FUTEX_OWNER_DIED) | newtid; err = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval); if (err) goto handle_err; if (curval == uval) break; uval = curval; } /* * We fixed up user space. Now we need to fix the pi_state * itself. */ pi_state_update_owner(pi_state, newowner); return argowner == current; /* * In order to reschedule or handle a page fault, we need to drop the * locks here. In the case of a fault, this gives the other task * (either the highest priority waiter itself or the task which stole * the rtmutex) the chance to try the fixup of the pi_state. So once we * are back from handling the fault we need to check the pi_state after * reacquiring the locks and before trying to do another fixup. When * the fixup has been done already we simply return. * * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely * drop hb->lock since the caller owns the hb -> futex_q relation. * Dropping the pi_mutex->wait_lock requires the state revalidate. */ handle_err: raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(q->lock_ptr); switch (err) { case -EFAULT: err = fault_in_user_writeable(uaddr); break; case -EAGAIN: cond_resched(); err = 0; break; default: WARN_ON_ONCE(1); break; } spin_lock(q->lock_ptr); raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); /* * Check if someone else fixed it for us: */ if (pi_state->owner != oldowner) return argowner == current; /* Retry if err was -EAGAIN or the fault in succeeded */ if (!err) goto retry; /* * fault_in_user_writeable() failed so user state is immutable. At * best we can make the kernel state consistent but user state will * be most likely hosed and any subsequent unlock operation will be * rejected due to PI futex rule [10]. * * Ensure that the rtmutex owner is also the pi_state owner despite * the user space value claiming something different. There is no * point in unlocking the rtmutex if current is the owner as it * would need to wait until the next waiter has taken the rtmutex * to guarantee consistent state. Keep it simple. Userspace asked * for this wreckaged state. * * The rtmutex has an owner - either current or some other * task. See the EAGAIN loop above. */ pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex)); return err; } static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, struct task_struct *argowner) { struct futex_pi_state *pi_state = q->pi_state; int ret; lockdep_assert_held(q->lock_ptr); raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); ret = __fixup_pi_state_owner(uaddr, q, argowner); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); return ret; } /** * fixup_pi_owner() - Post lock pi_state and corner case management * @uaddr: user address of the futex * @q: futex_q (contains pi_state and access to the rt_mutex) * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) * * After attempting to lock an rt_mutex, this function is called to cleanup * the pi_state owner as well as handle race conditions that may allow us to * acquire the lock. Must be called with the hb lock held. * * Return: * - 1 - success, lock taken; * - 0 - success, lock not taken; * - <0 - on error (-EFAULT) */ int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked) { if (locked) { /* * Got the lock. We might not be the anticipated owner if we * did a lock-steal - fix up the PI-state in that case: * * Speculative pi_state->owner read (we don't hold wait_lock); * since we own the lock pi_state->owner == current is the * stable state, anything else needs more attention. */ if (q->pi_state->owner != current) return fixup_pi_state_owner(uaddr, q, current); return 1; } /* * If we didn't get the lock; check if anybody stole it from us. In * that case, we need to fix up the uval to point to them instead of * us, otherwise bad things happen. [10] * * Another speculative read; pi_state->owner == current is unstable * but needs our attention. */ if (q->pi_state->owner == current) return fixup_pi_state_owner(uaddr, q, NULL); /* * Paranoia check. If we did not take the lock, then we should not be * the owner of the rt_mutex. Warn and establish consistent state. */ if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current)) return fixup_pi_state_owner(uaddr, q, current); return 0; } /* * Userspace tried a 0 -> TID atomic transition of the futex value * and failed. The kernel side here does the whole locking operation: * if there are waiters then it will block as a consequence of relying * on rt-mutexes, it does PI, etc. (Due to races the kernel might see * a 0 value of the futex too.). * * Also serves as futex trylock_pi()'ing, and due semantics. */ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to; struct task_struct *exiting = NULL; struct rt_mutex_waiter rt_waiter; struct futex_hash_bucket *hb; struct futex_q q = futex_q_init; int res, ret; if (!IS_ENABLED(CONFIG_FUTEX_PI)) return -ENOSYS; if (refill_pi_state_cache()) return -ENOMEM; to = futex_setup_timer(time, &timeout, flags, 0); retry: ret = get_futex_key(uaddr, flags, &q.key, FUTEX_WRITE); if (unlikely(ret != 0)) goto out; retry_private: hb = futex_q_lock(&q); ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, &exiting, 0); if (unlikely(ret)) { /* * Atomic work succeeded and we got the lock, * or failed. Either way, we do _not_ block. */ switch (ret) { case 1: /* We got the lock. */ ret = 0; goto out_unlock_put_key; case -EFAULT: goto uaddr_faulted; case -EBUSY: case -EAGAIN: /* * Two reasons for this: * - EBUSY: Task is exiting and we just wait for the * exit to complete. * - EAGAIN: The user space value changed. */ futex_q_unlock(hb); /* * Handle the case where the owner is in the middle of * exiting. Wait for the exit to complete otherwise * this task might loop forever, aka. live lock. */ wait_for_owner_exiting(ret, exiting); cond_resched(); goto retry; default: goto out_unlock_put_key; } } WARN_ON(!q.pi_state); /* * Only actually queue now that the atomic ops are done: */ __futex_queue(&q, hb); if (trylock) { ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); /* Fixup the trylock return value: */ ret = ret ? 0 : -EWOULDBLOCK; goto no_block; } /* * Must be done before we enqueue the waiter, here is unfortunately * under the hb lock, but that *should* work because it does nothing. */ rt_mutex_pre_schedule(); rt_mutex_init_waiter(&rt_waiter); /* * On PREEMPT_RT, when hb->lock becomes an rt_mutex, we must not * hold it while doing rt_mutex_start_proxy(), because then it will * include hb->lock in the blocking chain, even through we'll not in * fact hold it while blocking. This will lead it to report -EDEADLK * and BUG when futex_unlock_pi() interleaves with this. * * Therefore acquire wait_lock while holding hb->lock, but drop the * latter before calling __rt_mutex_start_proxy_lock(). This * interleaves with futex_unlock_pi() -- which does a similar lock * handoff -- such that the latter can observe the futex_q::pi_state * before __rt_mutex_start_proxy_lock() is done. */ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); spin_unlock(q.lock_ptr); /* * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter * such that futex_unlock_pi() is guaranteed to observe the waiter when * it sees the futex_q::pi_state. */ ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); if (ret) { if (ret == 1) ret = 0; goto cleanup; } if (unlikely(to)) hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS); ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); cleanup: /* * If we failed to acquire the lock (deadlock/signal/timeout), we must * must unwind the above, however we canont lock hb->lock because * rt_mutex already has a waiter enqueued and hb->lock can itself try * and enqueue an rt_waiter through rtlock. * * Doing the cleanup without holding hb->lock can cause inconsistent * state between hb and pi_state, but only in the direction of not * seeing a waiter that is leaving. * * See futex_unlock_pi(), it deals with this inconsistency. * * There be dragons here, since we must deal with the inconsistency on * the way out (here), it is impossible to detect/warn about the race * the other way around (missing an incoming waiter). * * What could possibly go wrong... */ if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) ret = 0; /* * Now that the rt_waiter has been dequeued, it is safe to use * spinlock/rtlock (which might enqueue its own rt_waiter) and fix up * the */ spin_lock(q.lock_ptr); /* * Waiter is unqueued. */ rt_mutex_post_schedule(); no_block: /* * Fixup the pi_state owner and possibly acquire the lock if we * haven't already. */ res = fixup_pi_owner(uaddr, &q, !ret); /* * If fixup_pi_owner() returned an error, propagate that. If it acquired * the lock, clear our -ETIMEDOUT or -EINTR. */ if (res) ret = (res < 0) ? res : 0; futex_unqueue_pi(&q); spin_unlock(q.lock_ptr); goto out; out_unlock_put_key: futex_q_unlock(hb); out: if (to) { hrtimer_cancel(&to->timer); destroy_hrtimer_on_stack(&to->timer); } return ret != -EINTR ? ret : -ERESTARTNOINTR; uaddr_faulted: futex_q_unlock(hb); ret = fault_in_user_writeable(uaddr); if (ret) goto out; if (!(flags & FLAGS_SHARED)) goto retry_private; goto retry; } /* * Userspace attempted a TID -> 0 atomic transition, and failed. * This is the in-kernel slowpath: we look up the PI state (if any), * and do the rt-mutex unlock. */ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) { u32 curval, uval, vpid = task_pid_vnr(current); union futex_key key = FUTEX_KEY_INIT; struct futex_hash_bucket *hb; struct futex_q *top_waiter; int ret; if (!IS_ENABLED(CONFIG_FUTEX_PI)) return -ENOSYS; retry: if (get_user(uval, uaddr)) return -EFAULT; /* * We release only a lock we actually own: */ if ((uval & FUTEX_TID_MASK) != vpid) return -EPERM; ret = get_futex_key(uaddr, flags, &key, FUTEX_WRITE); if (ret) return ret; hb = futex_hash(&key); spin_lock(&hb->lock); /* * Check waiters first. We do not trust user space values at * all and we at least want to know if user space fiddled * with the futex value instead of blindly unlocking. */ top_waiter = futex_top_waiter(hb, &key); if (top_waiter) { struct futex_pi_state *pi_state = top_waiter->pi_state; struct rt_mutex_waiter *rt_waiter; ret = -EINVAL; if (!pi_state) goto out_unlock; /* * If current does not own the pi_state then the futex is * inconsistent and user space fiddled with the futex value. */ if (pi_state->owner != current) goto out_unlock; /* * By taking wait_lock while still holding hb->lock, we ensure * there is no point where we hold neither; and thereby * wake_futex_pi() must observe any new waiters. * * Since the cleanup: case in futex_lock_pi() removes the * rt_waiter without holding hb->lock, it is possible for * wake_futex_pi() to not find a waiter while the above does, * in this case the waiter is on the way out and it can be * ignored. * * In particular; this forces __rt_mutex_start_proxy() to * complete such that we're guaranteed to observe the * rt_waiter. */ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); /* * Futex vs rt_mutex waiter state -- if there are no rt_mutex * waiters even though futex thinks there are, then the waiter * is leaving and the uncontended path is safe to take. */ rt_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); if (!rt_waiter) { raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); goto do_uncontended; } get_pi_state(pi_state); spin_unlock(&hb->lock); /* drops pi_state->pi_mutex.wait_lock */ ret = wake_futex_pi(uaddr, uval, pi_state, rt_waiter); put_pi_state(pi_state); /* * Success, we're done! No tricky corner cases. */ if (!ret) return ret; /* * The atomic access to the futex value generated a * pagefault, so retry the user-access and the wakeup: */ if (ret == -EFAULT) goto pi_faulted; /* * A unconditional UNLOCK_PI op raced against a waiter * setting the FUTEX_WAITERS bit. Try again. */ if (ret == -EAGAIN) goto pi_retry; /* * wake_futex_pi has detected invalid state. Tell user * space. */ return ret; } do_uncontended: /* * We have no kernel internal state, i.e. no waiters in the * kernel. Waiters which are about to queue themselves are stuck * on hb->lock. So we can safely ignore them. We do neither * preserve the WAITERS bit not the OWNER_DIED one. We are the * owner. */ if ((ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, 0))) { spin_unlock(&hb->lock); switch (ret) { case -EFAULT: goto pi_faulted; case -EAGAIN: goto pi_retry; default: WARN_ON_ONCE(1); return ret; } } /* * If uval has changed, let user space handle it. */ ret = (curval == uval) ? 0 : -EAGAIN; out_unlock: spin_unlock(&hb->lock); return ret; pi_retry: cond_resched(); goto retry; pi_faulted: ret = fault_in_user_writeable(uaddr); if (!ret) goto retry; return ret; } |
2 2 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 | // SPDX-License-Identifier: GPL-2.0-only /* * (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * (C) 2011 Patrick McHardy <kaber@trash.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter.h> #include <linux/netfilter/x_tables.h> #include <net/netfilter/nf_nat.h> static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par) { const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; if (mr->rangesize != 1) { pr_info_ratelimited("multiple ranges no longer supported\n"); return -EINVAL; } return nf_ct_netns_get(par->net, par->family); } static int xt_nat_checkentry(const struct xt_tgchk_param *par) { return nf_ct_netns_get(par->net, par->family); } static void xt_nat_destroy(const struct xt_tgdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static void xt_nat_convert_range(struct nf_nat_range2 *dst, const struct nf_nat_ipv4_range *src) { memset(&dst->min_addr, 0, sizeof(dst->min_addr)); memset(&dst->max_addr, 0, sizeof(dst->max_addr)); memset(&dst->base_proto, 0, sizeof(dst->base_proto)); dst->flags = src->flags; dst->min_addr.ip = src->min_ip; dst->max_addr.ip = src->max_ip; dst->min_proto = src->min; dst->max_proto = src->max; } static unsigned int xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; struct nf_nat_range2 range; enum ip_conntrack_info ctinfo; struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); WARN_ON(!(ct != NULL && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY))); xt_nat_convert_range(&range, &mr->range[0]); return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); } static unsigned int xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; struct nf_nat_range2 range; enum ip_conntrack_info ctinfo; struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); WARN_ON(!(ct != NULL && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED))); xt_nat_convert_range(&range, &mr->range[0]); return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); } static unsigned int xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct nf_nat_range *range_v1 = par->targinfo; struct nf_nat_range2 range; enum ip_conntrack_info ctinfo; struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); WARN_ON(!(ct != NULL && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY))); memcpy(&range, range_v1, sizeof(*range_v1)); memset(&range.base_proto, 0, sizeof(range.base_proto)); return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); } static unsigned int xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct nf_nat_range *range_v1 = par->targinfo; struct nf_nat_range2 range; enum ip_conntrack_info ctinfo; struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); WARN_ON(!(ct != NULL && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED))); memcpy(&range, range_v1, sizeof(*range_v1)); memset(&range.base_proto, 0, sizeof(range.base_proto)); return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); } static unsigned int xt_snat_target_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct nf_nat_range2 *range = par->targinfo; enum ip_conntrack_info ctinfo; struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); WARN_ON(!(ct != NULL && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY))); return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC); } static unsigned int xt_dnat_target_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct nf_nat_range2 *range = par->targinfo; enum ip_conntrack_info ctinfo; struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); WARN_ON(!(ct != NULL && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED))); return nf_nat_setup_info(ct, range, NF_NAT_MANIP_DST); } static struct xt_target xt_nat_target_reg[] __read_mostly = { { .name = "SNAT", .revision = 0, .checkentry = xt_nat_checkentry_v0, .destroy = xt_nat_destroy, .target = xt_snat_target_v0, .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .family = NFPROTO_IPV4, .table = "nat", .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, { .name = "DNAT", .revision = 0, .checkentry = xt_nat_checkentry_v0, .destroy = xt_nat_destroy, .target = xt_dnat_target_v0, .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .family = NFPROTO_IPV4, .table = "nat", .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), .me = THIS_MODULE, }, { .name = "SNAT", .revision = 1, .checkentry = xt_nat_checkentry, .destroy = xt_nat_destroy, .target = xt_snat_target_v1, .targetsize = sizeof(struct nf_nat_range), .table = "nat", .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, { .name = "DNAT", .revision = 1, .checkentry = xt_nat_checkentry, .destroy = xt_nat_destroy, .target = xt_dnat_target_v1, .targetsize = sizeof(struct nf_nat_range), .table = "nat", .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), .me = THIS_MODULE, }, { .name = "SNAT", .revision = 2, .checkentry = xt_nat_checkentry, .destroy = xt_nat_destroy, .target = xt_snat_target_v2, .targetsize = sizeof(struct nf_nat_range2), .table = "nat", .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, { .name = "DNAT", .revision = 2, .checkentry = xt_nat_checkentry, .destroy = xt_nat_destroy, .target = xt_dnat_target_v2, .targetsize = sizeof(struct nf_nat_range2), .table = "nat", .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), .me = THIS_MODULE, }, }; static int __init xt_nat_init(void) { return xt_register_targets(xt_nat_target_reg, ARRAY_SIZE(xt_nat_target_reg)); } static void __exit xt_nat_exit(void) { xt_unregister_targets(xt_nat_target_reg, ARRAY_SIZE(xt_nat_target_reg)); } module_init(xt_nat_init); module_exit(xt_nat_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS("ipt_SNAT"); MODULE_ALIAS("ipt_DNAT"); MODULE_ALIAS("ip6t_SNAT"); MODULE_ALIAS("ip6t_DNAT"); MODULE_DESCRIPTION("SNAT and DNAT targets support"); |
18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Generic RTC interface. * This version contains the part of the user interface to the Real Time Clock * service. It is used with both the legacy mc146818 and also EFI * Struct rtc_time and first 12 ioctl by Paul Gortmaker, 1996 - separated out * from <linux/mc146818rtc.h> to this file for 2.4 kernels. * * Copyright (C) 1999 Hewlett-Packard Co. * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> */ #ifndef _LINUX_RTC_H_ #define _LINUX_RTC_H_ #include <linux/types.h> #include <linux/interrupt.h> #include <linux/nvmem-provider.h> #include <uapi/linux/rtc.h> extern int rtc_month_days(unsigned int month, unsigned int year); extern int rtc_year_days(unsigned int day, unsigned int month, unsigned int year); extern int rtc_valid_tm(struct rtc_time *tm); extern time64_t rtc_tm_to_time64(struct rtc_time *tm); extern void rtc_time64_to_tm(time64_t time, struct rtc_time *tm); ktime_t rtc_tm_to_ktime(struct rtc_time tm); struct rtc_time rtc_ktime_to_tm(ktime_t kt); /* * rtc_tm_sub - Return the difference in seconds. */ static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs) { return rtc_tm_to_time64(lhs) - rtc_tm_to_time64(rhs); } #include <linux/device.h> #include <linux/seq_file.h> #include <linux/cdev.h> #include <linux/poll.h> #include <linux/mutex.h> #include <linux/timerqueue.h> #include <linux/workqueue.h> extern struct class *rtc_class; /* * For these RTC methods the device parameter is the physical device * on whatever bus holds the hardware (I2C, Platform, SPI, etc), which * was passed to rtc_device_register(). Its driver_data normally holds * device state, including the rtc_device pointer for the RTC. * * Most of these methods are called with rtc_device.ops_lock held, * through the rtc_*(struct rtc_device *, ...) calls. * * The (current) exceptions are mostly filesystem hooks: * - the proc() hook for procfs */ struct rtc_class_ops { int (*ioctl)(struct device *, unsigned int, unsigned long); int (*read_time)(struct device *, struct rtc_time *); int (*set_time)(struct device *, struct rtc_time *); int (*read_alarm)(struct device *, struct rtc_wkalrm *); int (*set_alarm)(struct device *, struct rtc_wkalrm *); int (*proc)(struct device *, struct seq_file *); int (*alarm_irq_enable)(struct device *, unsigned int enabled); int (*read_offset)(struct device *, long *offset); int (*set_offset)(struct device *, long offset); int (*param_get)(struct device *, struct rtc_param *param); int (*param_set)(struct device *, struct rtc_param *param); }; struct rtc_device; struct rtc_timer { struct timerqueue_node node; ktime_t period; void (*func)(struct rtc_device *rtc); struct rtc_device *rtc; int enabled; }; /* flags */ #define RTC_DEV_BUSY 0 #define RTC_NO_CDEV 1 struct rtc_device { struct device dev; struct module *owner; int id; const struct rtc_class_ops *ops; struct mutex ops_lock; struct cdev char_dev; unsigned long flags; unsigned long irq_data; spinlock_t irq_lock; wait_queue_head_t irq_queue; struct fasync_struct *async_queue; int irq_freq; int max_user_freq; struct timerqueue_head timerqueue; struct rtc_timer aie_timer; struct rtc_timer uie_rtctimer; struct hrtimer pie_timer; /* sub second exp, so needs hrtimer */ int pie_enabled; struct work_struct irqwork; /* * This offset specifies the update timing of the RTC. * * tsched t1 write(t2.tv_sec - 1sec)) t2 RTC increments seconds * * The offset defines how tsched is computed so that the write to * the RTC (t2.tv_sec - 1sec) is correct versus the time required * for the transport of the write and the time which the RTC needs * to increment seconds the first time after the write (t2). * * For direct accessible RTCs tsched ~= t1 because the write time * is negligible. For RTCs behind slow busses the transport time is * significant and has to be taken into account. * * The time between the write (t1) and the first increment after * the write (t2) is RTC specific. For a MC146818 RTC it's 500ms, * for many others it's exactly 1 second. Consult the datasheet. * * The value of this offset is also used to calculate the to be * written value (t2.tv_sec - 1sec) at tsched. * * The default value for this is NSEC_PER_SEC + 10 msec default * transport time. The offset can be adjusted by drivers so the * calculation for the to be written value at tsched becomes * correct: * * newval = tsched + set_offset_nsec - NSEC_PER_SEC * and (tsched + set_offset_nsec) % NSEC_PER_SEC == 0 */ unsigned long set_offset_nsec; unsigned long features[BITS_TO_LONGS(RTC_FEATURE_CNT)]; time64_t range_min; timeu64_t range_max; timeu64_t alarm_offset_max; time64_t start_secs; time64_t offset_secs; bool set_start_time; #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL struct work_struct uie_task; struct timer_list uie_timer; /* Those fields are protected by rtc->irq_lock */ unsigned int oldsecs; unsigned int uie_irq_active:1; unsigned int stop_uie_polling:1; unsigned int uie_task_active:1; unsigned int uie_timer_active:1; #endif }; #define to_rtc_device(d) container_of(d, struct rtc_device, dev) #define rtc_lock(d) mutex_lock(&d->ops_lock) #define rtc_unlock(d) mutex_unlock(&d->ops_lock) /* useful timestamps */ #define RTC_TIMESTAMP_BEGIN_0000 -62167219200ULL /* 0000-01-01 00:00:00 */ #define RTC_TIMESTAMP_BEGIN_1900 -2208988800LL /* 1900-01-01 00:00:00 */ #define RTC_TIMESTAMP_BEGIN_2000 946684800LL /* 2000-01-01 00:00:00 */ #define RTC_TIMESTAMP_END_2063 2966371199LL /* 2063-12-31 23:59:59 */ #define RTC_TIMESTAMP_END_2079 3471292799LL /* 2079-12-31 23:59:59 */ #define RTC_TIMESTAMP_END_2099 4102444799LL /* 2099-12-31 23:59:59 */ #define RTC_TIMESTAMP_END_2199 7258118399LL /* 2199-12-31 23:59:59 */ #define RTC_TIMESTAMP_END_9999 253402300799LL /* 9999-12-31 23:59:59 */ extern struct rtc_device *devm_rtc_device_register(struct device *dev, const char *name, const struct rtc_class_ops *ops, struct module *owner); struct rtc_device *devm_rtc_allocate_device(struct device *dev); int __devm_rtc_register_device(struct module *owner, struct rtc_device *rtc); extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm); extern int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); extern int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); extern int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); extern void rtc_update_irq(struct rtc_device *rtc, unsigned long num, unsigned long events); extern struct rtc_device *rtc_class_open(const char *name); extern void rtc_class_close(struct rtc_device *rtc); extern int rtc_irq_set_state(struct rtc_device *rtc, int enabled); extern int rtc_irq_set_freq(struct rtc_device *rtc, int freq); extern int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled); extern int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled); extern int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc, unsigned int enabled); void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode); void rtc_aie_update_irq(struct rtc_device *rtc); void rtc_uie_update_irq(struct rtc_device *rtc); enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer); void rtc_timer_init(struct rtc_timer *timer, void (*f)(struct rtc_device *r), struct rtc_device *rtc); int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer *timer, ktime_t expires, ktime_t period); void rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer *timer); int rtc_read_offset(struct rtc_device *rtc, long *offset); int rtc_set_offset(struct rtc_device *rtc, long offset); void rtc_timer_do_work(struct work_struct *work); static inline bool is_leap_year(unsigned int year) { return (!(year % 4) && (year % 100)) || !(year % 400); } /** * rtc_bound_alarmtime() - Return alarm time bound by rtc limit * @rtc: Pointer to rtc device structure * @requested: Requested alarm timeout * * Return: Alarm timeout bound by maximum alarm time supported by rtc. */ static inline ktime_t rtc_bound_alarmtime(struct rtc_device *rtc, ktime_t requested) { if (rtc->alarm_offset_max && rtc->alarm_offset_max * MSEC_PER_SEC < ktime_to_ms(requested)) return ms_to_ktime(rtc->alarm_offset_max * MSEC_PER_SEC); return requested; } #define devm_rtc_register_device(device) \ __devm_rtc_register_device(THIS_MODULE, device) #ifdef CONFIG_RTC_HCTOSYS_DEVICE extern int rtc_hctosys_ret; #else #define rtc_hctosys_ret -ENODEV #endif #ifdef CONFIG_RTC_NVMEM int devm_rtc_nvmem_register(struct rtc_device *rtc, struct nvmem_config *nvmem_config); #else static inline int devm_rtc_nvmem_register(struct rtc_device *rtc, struct nvmem_config *nvmem_config) { return 0; } #endif #ifdef CONFIG_RTC_INTF_SYSFS int rtc_add_group(struct rtc_device *rtc, const struct attribute_group *grp); int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps); #else static inline int rtc_add_group(struct rtc_device *rtc, const struct attribute_group *grp) { return 0; } static inline int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps) { return 0; } #endif #endif /* _LINUX_RTC_H_ */ |
6 42 36 42 6 19 19 11 8 8 3716 3716 481 482 3733 525 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 | // SPDX-License-Identifier: GPL-2.0 /* * linux/mm/mempool.c * * memory buffer pool support. Such pools are mostly used * for guaranteed, deadlock-free memory allocations during * extreme VM load. * * started by Ingo Molnar, Copyright (C) 2001 * debugging by David Rientjes, Copyright (C) 2015 */ #include <linux/mm.h> #include <linux/slab.h> #include <linux/highmem.h> #include <linux/kasan.h> #include <linux/kmemleak.h> #include <linux/export.h> #include <linux/mempool.h> #include <linux/writeback.h> #include "slab.h" #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON) static void poison_error(mempool_t *pool, void *element, size_t size, size_t byte) { const int nr = pool->curr_nr; const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0); const int end = min_t(int, byte + (BITS_PER_LONG / 8), size); int i; pr_err("BUG: mempool element poison mismatch\n"); pr_err("Mempool %p size %zu\n", pool, size); pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : ""); for (i = start; i < end; i++) pr_cont("%x ", *(u8 *)(element + i)); pr_cont("%s\n", end < size ? "..." : ""); dump_stack(); } static void __check_element(mempool_t *pool, void *element, size_t size) { u8 *obj = element; size_t i; for (i = 0; i < size; i++) { u8 exp = (i < size - 1) ? POISON_FREE : POISON_END; if (obj[i] != exp) { poison_error(pool, element, size, i); return; } } memset(obj, POISON_INUSE, size); } static void check_element(mempool_t *pool, void *element) { /* Mempools backed by slab allocator */ if (pool->free == mempool_kfree) { __check_element(pool, element, (size_t)pool->pool_data); } else if (pool->free == mempool_free_slab) { __check_element(pool, element, kmem_cache_size(pool->pool_data)); } else if (pool->free == mempool_free_pages) { /* Mempools backed by page allocator */ int order = (int)(long)pool->pool_data; void *addr = kmap_atomic((struct page *)element); __check_element(pool, addr, 1UL << (PAGE_SHIFT + order)); kunmap_atomic(addr); } } static void __poison_element(void *element, size_t size) { u8 *obj = element; memset(obj, POISON_FREE, size - 1); obj[size - 1] = POISON_END; } static void poison_element(mempool_t *pool, void *element) { /* Mempools backed by slab allocator */ if (pool->alloc == mempool_kmalloc) { __poison_element(element, (size_t)pool->pool_data); } else if (pool->alloc == mempool_alloc_slab) { __poison_element(element, kmem_cache_size(pool->pool_data)); } else if (pool->alloc == mempool_alloc_pages) { /* Mempools backed by page allocator */ int order = (int)(long)pool->pool_data; void *addr = kmap_atomic((struct page *)element); __poison_element(addr, 1UL << (PAGE_SHIFT + order)); kunmap_atomic(addr); } } #else /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */ static inline void check_element(mempool_t *pool, void *element) { } static inline void poison_element(mempool_t *pool, void *element) { } #endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */ static __always_inline void kasan_poison_element(mempool_t *pool, void *element) { if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) kasan_slab_free_mempool(element); else if (pool->alloc == mempool_alloc_pages) kasan_poison_pages(element, (unsigned long)pool->pool_data, false); } static void kasan_unpoison_element(mempool_t *pool, void *element) { if (pool->alloc == mempool_kmalloc) kasan_unpoison_range(element, (size_t)pool->pool_data); else if (pool->alloc == mempool_alloc_slab) kasan_unpoison_range(element, kmem_cache_size(pool->pool_data)); else if (pool->alloc == mempool_alloc_pages) kasan_unpoison_pages(element, (unsigned long)pool->pool_data, false); } static __always_inline void add_element(mempool_t *pool, void *element) { BUG_ON(pool->curr_nr >= pool->min_nr); poison_element(pool, element); kasan_poison_element(pool, element); pool->elements[pool->curr_nr++] = element; } static void *remove_element(mempool_t *pool) { void *element = pool->elements[--pool->curr_nr]; BUG_ON(pool->curr_nr < 0); kasan_unpoison_element(pool, element); check_element(pool, element); return element; } /** * mempool_exit - exit a mempool initialized with mempool_init() * @pool: pointer to the memory pool which was initialized with * mempool_init(). * * Free all reserved elements in @pool and @pool itself. This function * only sleeps if the free_fn() function sleeps. * * May be called on a zeroed but uninitialized mempool (i.e. allocated with * kzalloc()). */ void mempool_exit(mempool_t *pool) { while (pool->curr_nr) { void *element = remove_element(pool); pool->free(element, pool->pool_data); } kfree(pool->elements); pool->elements = NULL; } EXPORT_SYMBOL(mempool_exit); /** * mempool_destroy - deallocate a memory pool * @pool: pointer to the memory pool which was allocated via * mempool_create(). * * Free all reserved elements in @pool and @pool itself. This function * only sleeps if the free_fn() function sleeps. */ void mempool_destroy(mempool_t *pool) { if (unlikely(!pool)) return; mempool_exit(pool); kfree(pool); } EXPORT_SYMBOL(mempool_destroy); int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int node_id) { spin_lock_init(&pool->lock); pool->min_nr = min_nr; pool->pool_data = pool_data; pool->alloc = alloc_fn; pool->free = free_fn; init_waitqueue_head(&pool->wait); pool->elements = kmalloc_array_node(min_nr, sizeof(void *), gfp_mask, node_id); if (!pool->elements) return -ENOMEM; /* * First pre-allocate the guaranteed number of buffers. */ while (pool->curr_nr < pool->min_nr) { void *element; element = pool->alloc(gfp_mask, pool->pool_data); if (unlikely(!element)) { mempool_exit(pool); return -ENOMEM; } add_element(pool, element); } return 0; } EXPORT_SYMBOL(mempool_init_node); /** * mempool_init - initialize a memory pool * @pool: pointer to the memory pool that should be initialized * @min_nr: the minimum number of elements guaranteed to be * allocated for this pool. * @alloc_fn: user-defined element-allocation function. * @free_fn: user-defined element-freeing function. * @pool_data: optional private data available to the user-defined functions. * * Like mempool_create(), but initializes the pool in (i.e. embedded in another * structure). * * Return: %0 on success, negative error code otherwise. */ int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data) { return mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data, GFP_KERNEL, NUMA_NO_NODE); } EXPORT_SYMBOL(mempool_init); /** * mempool_create - create a memory pool * @min_nr: the minimum number of elements guaranteed to be * allocated for this pool. * @alloc_fn: user-defined element-allocation function. * @free_fn: user-defined element-freeing function. * @pool_data: optional private data available to the user-defined functions. * * this function creates and allocates a guaranteed size, preallocated * memory pool. The pool can be used from the mempool_alloc() and mempool_free() * functions. This function might sleep. Both the alloc_fn() and the free_fn() * functions might sleep - as long as the mempool_alloc() function is not called * from IRQ contexts. * * Return: pointer to the created memory pool object or %NULL on error. */ mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data) { return mempool_create_node(min_nr, alloc_fn, free_fn, pool_data, GFP_KERNEL, NUMA_NO_NODE); } EXPORT_SYMBOL(mempool_create); mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int node_id) { mempool_t *pool; pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id); if (!pool) return NULL; if (mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data, gfp_mask, node_id)) { kfree(pool); return NULL; } return pool; } EXPORT_SYMBOL(mempool_create_node); /** * mempool_resize - resize an existing memory pool * @pool: pointer to the memory pool which was allocated via * mempool_create(). * @new_min_nr: the new minimum number of elements guaranteed to be * allocated for this pool. * * This function shrinks/grows the pool. In the case of growing, * it cannot be guaranteed that the pool will be grown to the new * size immediately, but new mempool_free() calls will refill it. * This function may sleep. * * Note, the caller must guarantee that no mempool_destroy is called * while this function is running. mempool_alloc() & mempool_free() * might be called (eg. from IRQ contexts) while this function executes. * * Return: %0 on success, negative error code otherwise. */ int mempool_resize(mempool_t *pool, int new_min_nr) { void *element; void **new_elements; unsigned long flags; BUG_ON(new_min_nr <= 0); might_sleep(); spin_lock_irqsave(&pool->lock, flags); if (new_min_nr <= pool->min_nr) { while (new_min_nr < pool->curr_nr) { element = remove_element(pool); spin_unlock_irqrestore(&pool->lock, flags); pool->free(element, pool->pool_data); spin_lock_irqsave(&pool->lock, flags); } pool->min_nr = new_min_nr; goto out_unlock; } spin_unlock_irqrestore(&pool->lock, flags); /* Grow the pool */ new_elements = kmalloc_array(new_min_nr, sizeof(*new_elements), GFP_KERNEL); if (!new_elements) return -ENOMEM; spin_lock_irqsave(&pool->lock, flags); if (unlikely(new_min_nr <= pool->min_nr)) { /* Raced, other resize will do our work */ spin_unlock_irqrestore(&pool->lock, flags); kfree(new_elements); goto out; } memcpy(new_elements, pool->elements, pool->curr_nr * sizeof(*new_elements)); kfree(pool->elements); pool->elements = new_elements; pool->min_nr = new_min_nr; while (pool->curr_nr < pool->min_nr) { spin_unlock_irqrestore(&pool->lock, flags); element = pool->alloc(GFP_KERNEL, pool->pool_data); if (!element) goto out; spin_lock_irqsave(&pool->lock, flags); if (pool->curr_nr < pool->min_nr) { add_element(pool, element); } else { spin_unlock_irqrestore(&pool->lock, flags); pool->free(element, pool->pool_data); /* Raced */ goto out; } } out_unlock: spin_unlock_irqrestore(&pool->lock, flags); out: return 0; } EXPORT_SYMBOL(mempool_resize); /** * mempool_alloc - allocate an element from a specific memory pool * @pool: pointer to the memory pool which was allocated via * mempool_create(). * @gfp_mask: the usual allocation bitmask. * * this function only sleeps if the alloc_fn() function sleeps or * returns NULL. Note that due to preallocation, this function * *never* fails when called from process contexts. (it might * fail if called from an IRQ context.) * Note: using __GFP_ZERO is not supported. * * Return: pointer to the allocated element or %NULL on error. */ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) { void *element; unsigned long flags; wait_queue_entry_t wait; gfp_t gfp_temp; VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); might_alloc(gfp_mask); gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ gfp_mask |= __GFP_NOWARN; /* failures are OK */ gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO); repeat_alloc: element = pool->alloc(gfp_temp, pool->pool_data); if (likely(element != NULL)) return element; spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr)) { element = remove_element(pool); spin_unlock_irqrestore(&pool->lock, flags); /* paired with rmb in mempool_free(), read comment there */ smp_wmb(); /* * Update the allocation stack trace as this is more useful * for debugging. */ kmemleak_update_trace(element); return element; } /* * We use gfp mask w/o direct reclaim or IO for the first round. If * alloc failed with that and @pool was empty, retry immediately. */ if (gfp_temp != gfp_mask) { spin_unlock_irqrestore(&pool->lock, flags); gfp_temp = gfp_mask; goto repeat_alloc; } /* We must not sleep if !__GFP_DIRECT_RECLAIM */ if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) { spin_unlock_irqrestore(&pool->lock, flags); return NULL; } /* Let's wait for someone else to return an element to @pool */ init_wait(&wait); prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); spin_unlock_irqrestore(&pool->lock, flags); /* * FIXME: this should be io_schedule(). The timeout is there as a * workaround for some DM problems in 2.6.18. */ io_schedule_timeout(5*HZ); finish_wait(&pool->wait, &wait); goto repeat_alloc; } EXPORT_SYMBOL(mempool_alloc); /** * mempool_free - return an element to the pool. * @element: pool element pointer. * @pool: pointer to the memory pool which was allocated via * mempool_create(). * * this function only sleeps if the free_fn() function sleeps. */ void mempool_free(void *element, mempool_t *pool) { unsigned long flags; if (unlikely(element == NULL)) return; /* * Paired with the wmb in mempool_alloc(). The preceding read is * for @element and the following @pool->curr_nr. This ensures * that the visible value of @pool->curr_nr is from after the * allocation of @element. This is necessary for fringe cases * where @element was passed to this task without going through * barriers. * * For example, assume @p is %NULL at the beginning and one task * performs "p = mempool_alloc(...);" while another task is doing * "while (!p) cpu_relax(); mempool_free(p, ...);". This function * may end up using curr_nr value which is from before allocation * of @p without the following rmb. */ smp_rmb(); /* * For correctness, we need a test which is guaranteed to trigger * if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr * without locking achieves that and refilling as soon as possible * is desirable. * * Because curr_nr visible here is always a value after the * allocation of @element, any task which decremented curr_nr below * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets * incremented to min_nr afterwards. If curr_nr gets incremented * to min_nr after the allocation of @element, the elements * allocated after that are subject to the same guarantee. * * Waiters happen iff curr_nr is 0 and the above guarantee also * ensures that there will be frees which return elements to the * pool waking up the waiters. */ if (unlikely(READ_ONCE(pool->curr_nr) < pool->min_nr)) { spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr < pool->min_nr)) { add_element(pool, element); spin_unlock_irqrestore(&pool->lock, flags); wake_up(&pool->wait); return; } spin_unlock_irqrestore(&pool->lock, flags); } pool->free(element, pool->pool_data); } EXPORT_SYMBOL(mempool_free); /* * A commonly used alloc and free fn. */ void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) { struct kmem_cache *mem = pool_data; VM_BUG_ON(mem->ctor); return kmem_cache_alloc(mem, gfp_mask); } EXPORT_SYMBOL(mempool_alloc_slab); void mempool_free_slab(void *element, void *pool_data) { struct kmem_cache *mem = pool_data; kmem_cache_free(mem, element); } EXPORT_SYMBOL(mempool_free_slab); /* * A commonly used alloc and free fn that kmalloc/kfrees the amount of memory * specified by pool_data */ void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) { size_t size = (size_t)pool_data; return kmalloc(size, gfp_mask); } EXPORT_SYMBOL(mempool_kmalloc); void mempool_kfree(void *element, void *pool_data) { kfree(element); } EXPORT_SYMBOL(mempool_kfree); /* * A simple mempool-backed page allocator that allocates pages * of the order specified by pool_data. */ void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data) { int order = (int)(long)pool_data; return alloc_pages(gfp_mask, order); } EXPORT_SYMBOL(mempool_alloc_pages); void mempool_free_pages(void *element, void *pool_data) { int order = (int)(long)pool_data; __free_pages(element, order); } EXPORT_SYMBOL(mempool_free_pages); |
13 1 11 18 2 1 1 3 1 11 1 1 1 1 1 1 1 1 1 3 1 1 1 3 1 1 38 2 1 1 1 33 11 2 1 2 1 1 1 1 1 1 8 4 4 1 460 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 | // SPDX-License-Identifier: GPL-2.0-or-later /* * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org> * (C) 2012 by Vyatta Inc. <http://www.vyatta.com> */ #include <linux/init.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/rculist.h> #include <linux/rculist_nulls.h> #include <linux/types.h> #include <linux/timer.h> #include <linux/security.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/netlink.h> #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/netfilter.h> #include <net/netlink.h> #include <net/netns/generic.h> #include <net/sock.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_cttimeout.h> static unsigned int nfct_timeout_id __read_mostly; struct ctnl_timeout { struct list_head head; struct list_head free_head; struct rcu_head rcu_head; refcount_t refcnt; char name[CTNL_TIMEOUT_NAME_MAX]; /* must be at the end */ struct nf_ct_timeout timeout; }; struct nfct_timeout_pernet { struct list_head nfct_timeout_list; struct list_head nfct_timeout_freelist; }; MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning"); static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING, .len = CTNL_TIMEOUT_NAME_MAX - 1}, [CTA_TIMEOUT_L3PROTO] = { .type = NLA_U16 }, [CTA_TIMEOUT_L4PROTO] = { .type = NLA_U8 }, [CTA_TIMEOUT_DATA] = { .type = NLA_NESTED }, }; static struct nfct_timeout_pernet *nfct_timeout_pernet(struct net *net) { return net_generic(net, nfct_timeout_id); } static int ctnl_timeout_parse_policy(void *timeout, const struct nf_conntrack_l4proto *l4proto, struct net *net, const struct nlattr *attr) { struct nlattr **tb; int ret = 0; tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb), GFP_KERNEL); if (!tb) return -ENOMEM; ret = nla_parse_nested_deprecated(tb, l4proto->ctnl_timeout.nlattr_max, attr, l4proto->ctnl_timeout.nla_policy, NULL); if (ret < 0) goto err; ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeout); err: kfree(tb); return ret; } static int cttimeout_new_timeout(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net); __u16 l3num; __u8 l4num; const struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout, *matching = NULL; char *name; int ret; if (!cda[CTA_TIMEOUT_NAME] || !cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO] || !cda[CTA_TIMEOUT_DATA]) return -EINVAL; name = nla_data(cda[CTA_TIMEOUT_NAME]); l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); list_for_each_entry(timeout, &pernet->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; if (info->nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; matching = timeout; break; } if (matching) { if (info->nlh->nlmsg_flags & NLM_F_REPLACE) { /* You cannot replace one timeout policy by another of * different kind, sorry. */ if (matching->timeout.l3num != l3num || matching->timeout.l4proto->l4proto != l4num) return -EINVAL; return ctnl_timeout_parse_policy(&matching->timeout.data, matching->timeout.l4proto, info->net, cda[CTA_TIMEOUT_DATA]); } return -EBUSY; } l4proto = nf_ct_l4proto_find(l4num); /* This protocol is not supportted, skip. */ if (l4proto->l4proto != l4num) { ret = -EOPNOTSUPP; goto err_proto_put; } timeout = kzalloc(sizeof(struct ctnl_timeout) + l4proto->ctnl_timeout.obj_size, GFP_KERNEL); if (timeout == NULL) { ret = -ENOMEM; goto err_proto_put; } ret = ctnl_timeout_parse_policy(&timeout->timeout.data, l4proto, info->net, cda[CTA_TIMEOUT_DATA]); if (ret < 0) goto err; strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME])); timeout->timeout.l3num = l3num; timeout->timeout.l4proto = l4proto; refcount_set(&timeout->refcnt, 1); __module_get(THIS_MODULE); list_add_tail_rcu(&timeout->head, &pernet->nfct_timeout_list); return 0; err: kfree(timeout); err_proto_put: return ret; } static int ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct ctnl_timeout *timeout) { struct nlmsghdr *nlh; unsigned int flags = portid ? NLM_F_MULTI : 0; const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto; struct nlattr *nest_parms; int ret; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, NFNETLINK_V0, 0); if (!nlh) goto nlmsg_failure; if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) || nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->timeout.l3num)) || nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto) || nla_put_be32(skb, CTA_TIMEOUT_USE, htonl(refcount_read(&timeout->refcnt)))) goto nla_put_failure; nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA); if (!nest_parms) goto nla_put_failure; ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->timeout.data); if (ret < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); nlmsg_end(skb, nlh); return skb->len; nlmsg_failure: nla_put_failure: nlmsg_cancel(skb, nlh); return -1; } static int ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct nfct_timeout_pernet *pernet; struct net *net = sock_net(skb->sk); struct ctnl_timeout *cur, *last; if (cb->args[2]) return 0; last = (struct ctnl_timeout *)cb->args[1]; if (cb->args[1]) cb->args[1] = 0; rcu_read_lock(); pernet = nfct_timeout_pernet(net); list_for_each_entry_rcu(cur, &pernet->nfct_timeout_list, head) { if (last) { if (cur != last) continue; last = NULL; } if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) { cb->args[1] = (unsigned long)cur; break; } } if (!cb->args[1]) cb->args[2] = 1; rcu_read_unlock(); return skb->len; } static int cttimeout_get_timeout(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net); int ret = -ENOENT; char *name; struct ctnl_timeout *cur; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ctnl_timeout_dump, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } if (!cda[CTA_TIMEOUT_NAME]) return -EINVAL; name = nla_data(cda[CTA_TIMEOUT_NAME]); list_for_each_entry(cur, &pernet->nfct_timeout_list, head) { struct sk_buff *skb2; if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (skb2 == NULL) { ret = -ENOMEM; break; } ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFNL_MSG_TYPE(info->nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_NEW, cur); if (ret <= 0) { kfree_skb(skb2); break; } ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); break; } return ret; } /* try to delete object, fail if it is still in use. */ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) { int ret = 0; /* We want to avoid races with ctnl_timeout_put. So only when the * current refcnt is 1, we decrease it to 0. */ if (refcount_dec_if_one(&timeout->refcnt)) { /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); nf_ct_untimeout(net, &timeout->timeout); kfree_rcu(timeout, rcu_head); } else { ret = -EBUSY; } return ret; } static int cttimeout_del_timeout(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net); struct ctnl_timeout *cur, *tmp; int ret = -ENOENT; char *name; if (!cda[CTA_TIMEOUT_NAME]) { list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) ctnl_timeout_try_del(info->net, cur); return 0; } name = nla_data(cda[CTA_TIMEOUT_NAME]); list_for_each_entry(cur, &pernet->nfct_timeout_list, head) { if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; ret = ctnl_timeout_try_del(info->net, cur); if (ret < 0) return ret; break; } return ret; } static int cttimeout_default_set(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { const struct nf_conntrack_l4proto *l4proto; __u8 l4num; int ret; if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO] || !cda[CTA_TIMEOUT_DATA]) return -EINVAL; l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); l4proto = nf_ct_l4proto_find(l4num); /* This protocol is not supported, skip. */ if (l4proto->l4proto != l4num) { ret = -EOPNOTSUPP; goto err; } ret = ctnl_timeout_parse_policy(NULL, l4proto, info->net, cda[CTA_TIMEOUT_DATA]); if (ret < 0) goto err; return 0; err: return ret; } static int cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, u16 l3num, const struct nf_conntrack_l4proto *l4proto, const unsigned int *timeouts) { struct nlmsghdr *nlh; unsigned int flags = portid ? NLM_F_MULTI : 0; struct nlattr *nest_parms; int ret; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, NFNETLINK_V0, 0); if (!nlh) goto nlmsg_failure; if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l3num)) || nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto)) goto nla_put_failure; nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA); if (!nest_parms) goto nla_put_failure; ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts); if (ret < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); nlmsg_end(skb, nlh); return skb->len; nlmsg_failure: nla_put_failure: nlmsg_cancel(skb, nlh); return -1; } static int cttimeout_default_get(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { const struct nf_conntrack_l4proto *l4proto; unsigned int *timeouts = NULL; struct sk_buff *skb2; __u16 l3num; __u8 l4num; int ret; if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO]) return -EINVAL; l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); l4proto = nf_ct_l4proto_find(l4num); if (l4proto->l4proto != l4num) return -EOPNOTSUPP; switch (l4proto->l4proto) { case IPPROTO_ICMP: timeouts = &nf_icmp_pernet(info->net)->timeout; break; case IPPROTO_TCP: timeouts = nf_tcp_pernet(info->net)->timeouts; break; case IPPROTO_UDP: case IPPROTO_UDPLITE: timeouts = nf_udp_pernet(info->net)->timeouts; break; case IPPROTO_DCCP: #ifdef CONFIG_NF_CT_PROTO_DCCP timeouts = nf_dccp_pernet(info->net)->dccp_timeout; #endif break; case IPPROTO_ICMPV6: timeouts = &nf_icmpv6_pernet(info->net)->timeout; break; case IPPROTO_SCTP: #ifdef CONFIG_NF_CT_PROTO_SCTP timeouts = nf_sctp_pernet(info->net)->timeouts; #endif break; case IPPROTO_GRE: #ifdef CONFIG_NF_CT_PROTO_GRE timeouts = nf_gre_pernet(info->net)->timeouts; #endif break; case 255: timeouts = &nf_generic_pernet(info->net)->timeout; break; default: WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto); break; } if (!timeouts) return -EOPNOTSUPP; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb2) return -ENOMEM; ret = cttimeout_default_fill_info(info->net, skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFNL_MSG_TYPE(info->nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_DEFAULT_SET, l3num, l4proto, timeouts); if (ret <= 0) { kfree_skb(skb2); return -ENOMEM; } return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); } static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net, const char *name) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); struct ctnl_timeout *timeout, *matching = NULL; list_for_each_entry_rcu(timeout, &pernet->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; if (!refcount_inc_not_zero(&timeout->refcnt)) goto err; matching = timeout; break; } err: return matching ? &matching->timeout : NULL; } static void ctnl_timeout_put(struct nf_ct_timeout *t) { struct ctnl_timeout *timeout = container_of(t, struct ctnl_timeout, timeout); if (refcount_dec_and_test(&timeout->refcnt)) { kfree_rcu(timeout, rcu_head); module_put(THIS_MODULE); } } static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { [IPCTNL_MSG_TIMEOUT_NEW] = { .call = cttimeout_new_timeout, .type = NFNL_CB_MUTEX, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, [IPCTNL_MSG_TIMEOUT_GET] = { .call = cttimeout_get_timeout, .type = NFNL_CB_MUTEX, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout, .type = NFNL_CB_MUTEX, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, [IPCTNL_MSG_TIMEOUT_DEFAULT_SET] = { .call = cttimeout_default_set, .type = NFNL_CB_MUTEX, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, [IPCTNL_MSG_TIMEOUT_DEFAULT_GET] = { .call = cttimeout_default_get, .type = NFNL_CB_MUTEX, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, }; static const struct nfnetlink_subsystem cttimeout_subsys = { .name = "conntrack_timeout", .subsys_id = NFNL_SUBSYS_CTNETLINK_TIMEOUT, .cb_count = IPCTNL_MSG_TIMEOUT_MAX, .cb = cttimeout_cb, }; MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT); static int __net_init cttimeout_net_init(struct net *net) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); INIT_LIST_HEAD(&pernet->nfct_timeout_list); INIT_LIST_HEAD(&pernet->nfct_timeout_freelist); return 0; } static void __net_exit cttimeout_net_pre_exit(struct net *net) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); struct ctnl_timeout *cur, *tmp; list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) { list_del_rcu(&cur->head); list_add(&cur->free_head, &pernet->nfct_timeout_freelist); } /* core calls synchronize_rcu() after this */ } static void __net_exit cttimeout_net_exit(struct net *net) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); struct ctnl_timeout *cur, *tmp; if (list_empty(&pernet->nfct_timeout_freelist)) return; nf_ct_untimeout(net, NULL); list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_freelist, free_head) { list_del(&cur->free_head); if (refcount_dec_and_test(&cur->refcnt)) kfree_rcu(cur, rcu_head); } } static struct pernet_operations cttimeout_ops = { .init = cttimeout_net_init, .pre_exit = cttimeout_net_pre_exit, .exit = cttimeout_net_exit, .id = &nfct_timeout_id, .size = sizeof(struct nfct_timeout_pernet), }; static const struct nf_ct_timeout_hooks hooks = { .timeout_find_get = ctnl_timeout_find_get, .timeout_put = ctnl_timeout_put, }; static int __init cttimeout_init(void) { int ret; ret = register_pernet_subsys(&cttimeout_ops); if (ret < 0) return ret; ret = nfnetlink_subsys_register(&cttimeout_subsys); if (ret < 0) { pr_err("cttimeout_init: cannot register cttimeout with " "nfnetlink.\n"); goto err_out; } RCU_INIT_POINTER(nf_ct_timeout_hook, &hooks); return 0; err_out: unregister_pernet_subsys(&cttimeout_ops); return ret; } static int untimeout(struct nf_conn *ct, void *timeout) { struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct); if (timeout_ext) RCU_INIT_POINTER(timeout_ext->timeout, NULL); return 0; } static void __exit cttimeout_exit(void) { nfnetlink_subsys_unregister(&cttimeout_subsys); unregister_pernet_subsys(&cttimeout_ops); RCU_INIT_POINTER(nf_ct_timeout_hook, NULL); nf_ct_iterate_destroy(untimeout, NULL); } module_init(cttimeout_init); module_exit(cttimeout_exit); |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | // SPDX-License-Identifier: GPL-2.0-only /* Kernel module to match running CPU */ /* * Might be used to distribute connections on several daemons, if * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable, * each RX queue IRQ affined to one CPU (1:1 mapping) */ /* (C) 2010 Eric Dumazet */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter/xt_cpu.h> #include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); MODULE_DESCRIPTION("Xtables: CPU match"); MODULE_ALIAS("ipt_cpu"); MODULE_ALIAS("ip6t_cpu"); static int cpu_mt_check(const struct xt_mtchk_param *par) { const struct xt_cpu_info *info = par->matchinfo; if (info->invert & ~1) return -EINVAL; return 0; } static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_cpu_info *info = par->matchinfo; return (info->cpu == smp_processor_id()) ^ info->invert; } static struct xt_match cpu_mt_reg __read_mostly = { .name = "cpu", .revision = 0, .family = NFPROTO_UNSPEC, .checkentry = cpu_mt_check, .match = cpu_mt, .matchsize = sizeof(struct xt_cpu_info), .me = THIS_MODULE, }; static int __init cpu_mt_init(void) { return xt_register_match(&cpu_mt_reg); } static void __exit cpu_mt_exit(void) { xt_unregister_match(&cpu_mt_reg); } module_init(cpu_mt_init); module_exit(cpu_mt_exit); |
33 305 15 446 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/mount.h> #include <linux/seq_file.h> #include <linux/poll.h> #include <linux/ns_common.h> #include <linux/fs_pin.h> struct mnt_namespace { struct ns_common ns; struct mount * root; /* * Traversal and modification of .list is protected by either * - taking namespace_sem for write, OR * - taking namespace_sem for read AND taking .ns_lock. */ struct list_head list; spinlock_t ns_lock; struct user_namespace *user_ns; struct ucounts *ucounts; u64 seq; /* Sequence number to prevent loops */ wait_queue_head_t poll; u64 event; unsigned int mounts; /* # of mounts in the namespace */ unsigned int pending_mounts; } __randomize_layout; struct mnt_pcp { int mnt_count; int mnt_writers; }; struct mountpoint { struct hlist_node m_hash; struct dentry *m_dentry; struct hlist_head m_list; int m_count; }; struct mount { struct hlist_node mnt_hash; struct mount *mnt_parent; struct dentry *mnt_mountpoint; struct vfsmount mnt; union { struct rcu_head mnt_rcu; struct llist_node mnt_llist; }; #ifdef CONFIG_SMP struct mnt_pcp __percpu *mnt_pcp; #else int mnt_count; int mnt_writers; #endif struct list_head mnt_mounts; /* list of children, anchored here */ struct list_head mnt_child; /* and going through their mnt_child */ struct list_head mnt_instance; /* mount instance on sb->s_mounts */ const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ struct list_head mnt_list; struct list_head mnt_expire; /* link in fs-specific expiry list */ struct list_head mnt_share; /* circular list of shared mounts */ struct list_head mnt_slave_list;/* list of slave mounts */ struct list_head mnt_slave; /* slave list entry */ struct mount *mnt_master; /* slave is on master->mnt_slave_list */ struct mnt_namespace *mnt_ns; /* containing namespace */ struct mountpoint *mnt_mp; /* where is it mounted */ union { struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ struct hlist_node mnt_umount; }; struct list_head mnt_umounting; /* list entry for umount propagation */ #ifdef CONFIG_FSNOTIFY struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks; __u32 mnt_fsnotify_mask; #endif int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ int mnt_expiry_mark; /* true if marked for expiry */ struct hlist_head mnt_pins; struct hlist_head mnt_stuck_children; } __randomize_layout; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ static inline struct mount *real_mount(struct vfsmount *mnt) { return container_of(mnt, struct mount, mnt); } static inline int mnt_has_parent(struct mount *mnt) { return mnt != mnt->mnt_parent; } static inline int is_mounted(struct vfsmount *mnt) { /* neither detached nor internal? */ return !IS_ERR_OR_NULL(real_mount(mnt)->mnt_ns); } extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); extern int __legitimize_mnt(struct vfsmount *, unsigned); static inline bool __path_is_mountpoint(const struct path *path) { struct mount *m = __lookup_mnt(path->mnt, path->dentry); return m && likely(!(m->mnt.mnt_flags & MNT_SYNC_UMOUNT)); } extern void __detach_mounts(struct dentry *dentry); static inline void detach_mounts(struct dentry *dentry) { if (!d_mountpoint(dentry)) return; __detach_mounts(dentry); } static inline void get_mnt_ns(struct mnt_namespace *ns) { refcount_inc(&ns->ns.count); } extern seqlock_t mount_lock; struct proc_mounts { struct mnt_namespace *ns; struct path root; int (*show)(struct seq_file *, struct vfsmount *); struct mount cursor; }; extern const struct seq_operations mounts_op; extern bool __is_local_mountpoint(struct dentry *dentry); static inline bool is_local_mountpoint(struct dentry *dentry) { if (!d_mountpoint(dentry)) return false; return __is_local_mountpoint(dentry); } static inline bool is_anon_ns(struct mnt_namespace *ns) { return ns->seq == 0; } extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor); |
1523 24 1 2 2509 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Connection state tracking for netfilter. This is separated from, * but required by, the (future) NAT layer; it can also be used by an iptables * extension. * * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> * - generalize L3 protocol dependent part. * * Derived from include/linux/netfiter_ipv4/ip_conntrack.h */ #ifndef _NF_CONNTRACK_H #define _NF_CONNTRACK_H #include <linux/bitops.h> #include <linux/compiler.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_conntrack_tcp.h> #include <linux/netfilter/nf_conntrack_dccp.h> #include <linux/netfilter/nf_conntrack_sctp.h> #include <linux/netfilter/nf_conntrack_proto_gre.h> #include <net/netfilter/nf_conntrack_tuple.h> struct nf_ct_udp { unsigned long stream_ts; }; /* per conntrack: protocol private data */ union nf_conntrack_proto { /* insert conntrack proto private data here */ struct nf_ct_dccp dccp; struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; struct nf_ct_udp udp; struct nf_ct_gre gre; unsigned int tmpl_padto; }; union nf_conntrack_expect_proto { /* insert expect proto private data here */ }; struct nf_conntrack_net_ecache { struct delayed_work dwork; spinlock_t dying_lock; struct hlist_nulls_head dying_list; }; struct nf_conntrack_net { /* only used when new connection is allocated: */ atomic_t count; unsigned int expect_count; /* only used from work queues, configuration plane, and so on: */ unsigned int users4; unsigned int users6; unsigned int users_bridge; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; #endif #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_net_ecache ecache; #endif }; #include <linux/types.h> #include <linux/skbuff.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> struct nf_conn { /* Usage count in here is 1 for hash table, 1 per skb, * plus 1 for any connection(s) we are `master' for * * Hint, SKB address this struct and refcnt via skb->_nfct and * helpers nf_conntrack_get() and nf_conntrack_put(). * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, * except that the latter uses internal indirection and does not * result in a conntrack module dependency. * beware nf_ct_get() is different and don't inc refcnt. */ struct nf_conntrack ct_general; spinlock_t lock; /* jiffies32 when this ct is considered dead */ u32 timeout; #ifdef CONFIG_NF_CONNTRACK_ZONES struct nf_conntrack_zone zone; #endif /* XXX should I move this to the tail ? - Y.K */ /* These are my tuples; original and reply */ struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; /* Have we seen traffic both ways yet? (bitset) */ unsigned long status; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) struct hlist_node nat_bysource; #endif /* all members below initialized via memset */ struct { } __nfct_init_offset; /* If we were expected by an expectation, this will be it */ struct nf_conn *master; #if defined(CONFIG_NF_CONNTRACK_MARK) u_int32_t mark; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK u_int32_t secmark; #endif /* Extensions */ struct nf_ct_ext *ext; /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; }; static inline struct nf_conn * nf_ct_to_nf_conn(const struct nf_conntrack *nfct) { return container_of(nfct, struct nf_conn, ct_general); } static inline struct nf_conn * nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash) { return container_of(hash, struct nf_conn, tuplehash[hash->tuple.dst.dir]); } static inline u_int16_t nf_ct_l3num(const struct nf_conn *ct) { return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; } static inline u_int8_t nf_ct_protonum(const struct nf_conn *ct) { return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; } #define nf_ct_tuple(ct, dir) (&(ct)->tuplehash[dir].tuple) /* get master conntrack via master expectation */ #define master_ct(conntr) (conntr->master) extern struct net init_net; static inline struct net *nf_ct_net(const struct nf_conn *ct) { return read_pnet(&ct->ct_net); } /* Is this tuple taken? (ignoring any belonging to the given conntrack). */ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); /* Return conntrack_info and tuple hash for given skb. */ static inline struct nf_conn * nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) { unsigned long nfct = skb_get_nfct(skb); *ctinfo = nfct & NFCT_INFOMASK; return (struct nf_conn *)(nfct & NFCT_PTRMASK); } void nf_ct_destroy(struct nf_conntrack *nfct); void nf_conntrack_tcp_set_closing(struct nf_conn *ct); /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { if (ct && refcount_dec_and_test(&ct->ct_general.use)) nf_ct_destroy(&ct->ct_general); } /* load module; enable/disable conntrack in this namespace */ int nf_ct_netns_get(struct net *net, u8 nfproto); void nf_ct_netns_put(struct net *net, u8 nfproto); /* * Allocate a hashtable of hlist_head (if nulls == 0), * or hlist_nulls_head (if nulls == 1) */ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls); int nf_conntrack_hash_check_insert(struct nf_conn *ct); bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report); bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, struct net *net, struct nf_conntrack_tuple *tuple); void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, u32 extra_jiffies, bool do_acct); /* Refresh conntrack for this many jiffies and do accounting */ static inline void nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, u32 extra_jiffies) { __nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, true); } /* Refresh conntrack for this many jiffies */ static inline void nf_ct_refresh(struct nf_conn *ct, const struct sk_buff *skb, u32 extra_jiffies) { __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, false); } /* kill conntrack and do accounting */ bool nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb); /* kill conntrack without accounting */ static inline bool nf_ct_kill(struct nf_conn *ct) { return nf_ct_delete(ct, 0, 0); } struct nf_ct_iter_data { struct net *net; void *data; u32 portid; int report; }; /* Iterate over all conntracks: if iter returns true, it's deleted. */ void nf_ct_iterate_cleanup_net(int (*iter)(struct nf_conn *i, void *data), const struct nf_ct_iter_data *iter_data); /* also set unconfirmed conntracks as dying. Only use in module exit path. */ void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data); struct nf_conntrack_zone; void nf_conntrack_free(struct nf_conn *ct); struct nf_conn *nf_conntrack_alloc(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp); static inline int nf_ct_is_template(const struct nf_conn *ct) { return test_bit(IPS_TEMPLATE_BIT, &ct->status); } /* It's confirmed if it is, or has been in the hash table. */ static inline int nf_ct_is_confirmed(const struct nf_conn *ct) { return test_bit(IPS_CONFIRMED_BIT, &ct->status); } static inline int nf_ct_is_dying(const struct nf_conn *ct) { return test_bit(IPS_DYING_BIT, &ct->status); } /* Packet is received from loopback */ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) { return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; } static inline void nf_conntrack_alter_reply(struct nf_conn *ct, const struct nf_conntrack_tuple *newreply) { /* Must be unconfirmed, so not in hash table yet */ if (WARN_ON(nf_ct_is_confirmed(ct))) return; ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; } #define nfct_time_stamp ((u32)(jiffies)) /* jiffies until ct expires, 0 if already expired */ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; return max(timeout, 0); } static inline bool nf_ct_is_expired(const struct nf_conn *ct) { return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0; } /* use after obtaining a reference count */ static inline bool nf_ct_should_gc(const struct nf_conn *ct) { return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct); } #define NF_CT_DAY (86400 * HZ) /* Set an arbitrary timeout large enough not to ever expire, this save * us a check for the IPS_OFFLOAD_BIT from the packet path via * nf_ct_is_expired(). */ static inline void nf_ct_offload_timeout(struct nf_conn *ct) { if (nf_ct_expires(ct) < NF_CT_DAY / 2) WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY); } struct kernel_param; int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp); int nf_conntrack_hash_resize(unsigned int hashsize); extern struct hlist_nulls_head *nf_conntrack_hash; extern unsigned int nf_conntrack_htable_size; extern seqcount_spinlock_t nf_conntrack_generation; extern unsigned int nf_conntrack_max; /* must be called with rcu read lock held */ static inline void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize) { struct hlist_nulls_head *hptr; unsigned int sequence, hsz; do { sequence = read_seqcount_begin(&nf_conntrack_generation); hsz = nf_conntrack_htable_size; hptr = nf_conntrack_hash; } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); *hash = hptr; *hsize = hsz; } struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags); void nf_ct_tmpl_free(struct nf_conn *tmpl); u32 nf_ct_get_id(const struct nf_conn *ct); u32 nf_conntrack_count(const struct net *net); static inline void nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) { skb_set_nfct(skb, (unsigned long)ct | info); } extern unsigned int nf_conntrack_net_id; static inline struct nf_conntrack_net *nf_ct_pernet(const struct net *net) { return net_generic(net, nf_conntrack_net_id); } int nf_ct_skb_network_trim(struct sk_buff *skb, int family); int nf_ct_handle_fragments(struct net *net, struct sk_buff *skb, u16 zone, u8 family, u8 *proto, u16 *mru); #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v)) #define MODULE_ALIAS_NFCT_HELPER(helper) \ MODULE_ALIAS("nfct-helper-" helper) #endif /* _NF_CONNTRACK_H */ |
207 3 26 63 2 4 162 148 22 4 4 4 262 46 45 4 1 1 1 3 11 432 78 54 284 16 268 172 201 106 165 12 18 306 38 294 1 9 252 290 36 253 428 217 6 209 185 84 55 22 33 15 24 28 13 4 14 11 254 228 302 157 157 156 340 340 341 341 339 1 1616 1567 127 21 9 33 23 21 23 180 84 6 133 8 4 44 17 1 1 1 1 2 2 2 26 1 1 1 1 1 20 1 10 2 2 1 1 2 2 106 1 1 1 31 6 67 1 74 55 7 48 47 4 25 20 4 4 15 23 20 5 2 19 19 2 1 52 29 1 1 1 21 7 48 1 1 1 1 1 1 42 36 13 5 5 4 2 21 2 2 106 1 102 83 1 47 9 2 2 2 74 75 1 48 12 56 4 4 4 6 6 6 6 2 2 1 1 1 57 1 1 50 41 1 4 33 11 1 1 9 7 1 103 1 1 1 98 59 22 9 30 7 17 29 1 2 2 19 15 40 1 1 1 2 2 27 11 33 1 1 1 10 6 4 3 1 4 6 6 19 9 1 7 1 5 6 1 1 1 3 3 1 1 1 73 1 13 5 9 25 29 3 38 36 26 5 20 14 7 7 1 2 11 5 431 434 302 1 135 120 55 49 6 32 197 145 62 23 119 185 34 73 172 433 1 436 1 154 446 1 3 438 455 454 457 2 2 1 2 448 489 5 3 473 9 2 6 7 6 457 6 162 161 161 162 161 2 34 1 36 162 8 2 1 3 46 8 278 162 5 156 1 341 40 40 1 1 2522 2520 1 121 121 1 3 495 337 155 134 72 10 1 1 3 4 1 5 8 6 2 6 58 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944 7945 7946 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018 8019 8020 8021 8022 8023 8024 8025 8026 8027 8028 8029 8030 8031 8032 8033 8034 8035 8036 8037 8038 8039 8040 8041 8042 8043 8044 8045 8046 8047 8048 8049 8050 8051 8052 8053 8054 8055 8056 8057 8058 8059 8060 8061 8062 8063 8064 8065 8066 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078 8079 8080 8081 8082 8083 8084 8085 8086 8087 8088 8089 8090 8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114 8115 8116 8117 8118 8119 8120 8121 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131 8132 8133 8134 8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145 8146 8147 8148 8149 8150 8151 8152 8153 8154 8155 8156 8157 8158 8159 8160 8161 8162 8163 8164 8165 8166 8167 8168 8169 8170 8171 8172 8173 8174 8175 8176 8177 8178 8179 8180 8181 8182 8183 8184 8185 8186 8187 8188 8189 8190 8191 8192 8193 8194 8195 8196 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210 8211 8212 8213 8214 8215 8216 8217 8218 8219 8220 8221 8222 8223 8224 8225 8226 8227 8228 8229 8230 8231 8232 8233 8234 8235 8236 8237 8238 8239 8240 8241 8242 8243 8244 8245 8246 8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258 8259 8260 8261 8262 8263 8264 8265 8266 8267 8268 8269 8270 8271 8272 8273 8274 8275 8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 8286 8287 8288 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306 8307 8308 8309 8310 8311 8312 8313 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 8354 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382 8383 8384 8385 8386 8387 8388 8389 8390 8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 8402 8403 8404 8405 8406 8407 8408 8409 8410 8411 8412 8413 8414 8415 8416 8417 8418 8419 8420 8421 8422 8423 8424 8425 8426 8427 8428 8429 8430 8431 8432 8433 8434 8435 8436 8437 8438 8439 8440 8441 8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461 8462 8463 8464 8465 8466 8467 8468 8469 8470 8471 8472 8473 8474 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486 8487 8488 8489 8490 8491 8492 8493 8494 8495 8496 8497 8498 8499 8500 8501 8502 8503 8504 8505 8506 8507 8508 8509 8510 8511 8512 8513 8514 8515 8516 8517 8518 8519 8520 8521 8522 8523 8524 8525 8526 8527 8528 8529 8530 8531 8532 8533 8534 8535 8536 8537 8538 8539 8540 8541 8542 8543 8544 8545 8546 8547 8548 8549 8550 8551 8552 8553 8554 8555 8556 8557 8558 8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569 8570 8571 8572 8573 8574 8575 8576 8577 8578 8579 8580 8581 8582 8583 8584 8585 8586 8587 8588 8589 8590 8591 8592 8593 8594 8595 8596 8597 8598 8599 8600 8601 8602 8603 8604 | // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018 Facebook */ #include <uapi/linux/btf.h> #include <uapi/linux/bpf.h> #include <uapi/linux/bpf_perf_event.h> #include <uapi/linux/types.h> #include <linux/seq_file.h> #include <linux/compiler.h> #include <linux/ctype.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/anon_inodes.h> #include <linux/file.h> #include <linux/uaccess.h> #include <linux/kernel.h> #include <linux/idr.h> #include <linux/sort.h> #include <linux/bpf_verifier.h> #include <linux/btf.h> #include <linux/btf_ids.h> #include <linux/bpf_lsm.h> #include <linux/skmsg.h> #include <linux/perf_event.h> #include <linux/bsearch.h> #include <linux/kobject.h> #include <linux/sysfs.h> #include <net/netfilter/nf_bpf_link.h> #include <net/sock.h> #include <net/xdp.h> #include "../tools/lib/bpf/relo_core.h" /* BTF (BPF Type Format) is the meta data format which describes * the data types of BPF program/map. Hence, it basically focus * on the C programming language which the modern BPF is primary * using. * * ELF Section: * ~~~~~~~~~~~ * The BTF data is stored under the ".BTF" ELF section * * struct btf_type: * ~~~~~~~~~~~~~~~ * Each 'struct btf_type' object describes a C data type. * Depending on the type it is describing, a 'struct btf_type' * object may be followed by more data. F.e. * To describe an array, 'struct btf_type' is followed by * 'struct btf_array'. * * 'struct btf_type' and any extra data following it are * 4 bytes aligned. * * Type section: * ~~~~~~~~~~~~~ * The BTF type section contains a list of 'struct btf_type' objects. * Each one describes a C type. Recall from the above section * that a 'struct btf_type' object could be immediately followed by extra * data in order to describe some particular C types. * * type_id: * ~~~~~~~ * Each btf_type object is identified by a type_id. The type_id * is implicitly implied by the location of the btf_type object in * the BTF type section. The first one has type_id 1. The second * one has type_id 2...etc. Hence, an earlier btf_type has * a smaller type_id. * * A btf_type object may refer to another btf_type object by using * type_id (i.e. the "type" in the "struct btf_type"). * * NOTE that we cannot assume any reference-order. * A btf_type object can refer to an earlier btf_type object * but it can also refer to a later btf_type object. * * For example, to describe "const void *". A btf_type * object describing "const" may refer to another btf_type * object describing "void *". This type-reference is done * by specifying type_id: * * [1] CONST (anon) type_id=2 * [2] PTR (anon) type_id=0 * * The above is the btf_verifier debug log: * - Each line started with "[?]" is a btf_type object * - [?] is the type_id of the btf_type object. * - CONST/PTR is the BTF_KIND_XXX * - "(anon)" is the name of the type. It just * happens that CONST and PTR has no name. * - type_id=XXX is the 'u32 type' in btf_type * * NOTE: "void" has type_id 0 * * String section: * ~~~~~~~~~~~~~~ * The BTF string section contains the names used by the type section. * Each string is referred by an "offset" from the beginning of the * string section. * * Each string is '\0' terminated. * * The first character in the string section must be '\0' * which is used to mean 'anonymous'. Some btf_type may not * have a name. */ /* BTF verification: * * To verify BTF data, two passes are needed. * * Pass #1 * ~~~~~~~ * The first pass is to collect all btf_type objects to * an array: "btf->types". * * Depending on the C type that a btf_type is describing, * a btf_type may be followed by extra data. We don't know * how many btf_type is there, and more importantly we don't * know where each btf_type is located in the type section. * * Without knowing the location of each type_id, most verifications * cannot be done. e.g. an earlier btf_type may refer to a later * btf_type (recall the "const void *" above), so we cannot * check this type-reference in the first pass. * * In the first pass, it still does some verifications (e.g. * checking the name is a valid offset to the string section). * * Pass #2 * ~~~~~~~ * The main focus is to resolve a btf_type that is referring * to another type. * * We have to ensure the referring type: * 1) does exist in the BTF (i.e. in btf->types[]) * 2) does not cause a loop: * struct A { * struct B b; * }; * * struct B { * struct A a; * }; * * btf_type_needs_resolve() decides if a btf_type needs * to be resolved. * * The needs_resolve type implements the "resolve()" ops which * essentially does a DFS and detects backedge. * * During resolve (or DFS), different C types have different * "RESOLVED" conditions. * * When resolving a BTF_KIND_STRUCT, we need to resolve all its * members because a member is always referring to another * type. A struct's member can be treated as "RESOLVED" if * it is referring to a BTF_KIND_PTR. Otherwise, the * following valid C struct would be rejected: * * struct A { * int m; * struct A *a; * }; * * When resolving a BTF_KIND_PTR, it needs to keep resolving if * it is referring to another BTF_KIND_PTR. Otherwise, we cannot * detect a pointer loop, e.g.: * BTF_KIND_CONST -> BTF_KIND_PTR -> BTF_KIND_CONST -> BTF_KIND_PTR + * ^ | * +-----------------------------------------+ * */ #define BITS_PER_U128 (sizeof(u64) * BITS_PER_BYTE * 2) #define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1) #define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK) #define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3) #define BITS_ROUNDUP_BYTES(bits) \ (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits)) #define BTF_INFO_MASK 0x9f00ffff #define BTF_INT_MASK 0x0fffffff #define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE) #define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET) /* 16MB for 64k structs and each has 16 members and * a few MB spaces for the string section. * The hard limit is S32_MAX. */ #define BTF_MAX_SIZE (16 * 1024 * 1024) #define for_each_member_from(i, from, struct_type, member) \ for (i = from, member = btf_type_member(struct_type) + from; \ i < btf_type_vlen(struct_type); \ i++, member++) #define for_each_vsi_from(i, from, struct_type, member) \ for (i = from, member = btf_type_var_secinfo(struct_type) + from; \ i < btf_type_vlen(struct_type); \ i++, member++) DEFINE_IDR(btf_idr); DEFINE_SPINLOCK(btf_idr_lock); enum btf_kfunc_hook { BTF_KFUNC_HOOK_COMMON, BTF_KFUNC_HOOK_XDP, BTF_KFUNC_HOOK_TC, BTF_KFUNC_HOOK_STRUCT_OPS, BTF_KFUNC_HOOK_TRACING, BTF_KFUNC_HOOK_SYSCALL, BTF_KFUNC_HOOK_FMODRET, BTF_KFUNC_HOOK_CGROUP_SKB, BTF_KFUNC_HOOK_SCHED_ACT, BTF_KFUNC_HOOK_SK_SKB, BTF_KFUNC_HOOK_SOCKET_FILTER, BTF_KFUNC_HOOK_LWT, BTF_KFUNC_HOOK_NETFILTER, BTF_KFUNC_HOOK_MAX, }; enum { BTF_KFUNC_SET_MAX_CNT = 256, BTF_DTOR_KFUNC_MAX_CNT = 256, BTF_KFUNC_FILTER_MAX_CNT = 16, }; struct btf_kfunc_hook_filter { btf_kfunc_filter_t filters[BTF_KFUNC_FILTER_MAX_CNT]; u32 nr_filters; }; struct btf_kfunc_set_tab { struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX]; struct btf_kfunc_hook_filter hook_filters[BTF_KFUNC_HOOK_MAX]; }; struct btf_id_dtor_kfunc_tab { u32 cnt; struct btf_id_dtor_kfunc dtors[]; }; struct btf { void *data; struct btf_type **types; u32 *resolved_ids; u32 *resolved_sizes; const char *strings; void *nohdr_data; struct btf_header hdr; u32 nr_types; /* includes VOID for base BTF */ u32 types_size; u32 data_size; refcount_t refcnt; u32 id; struct rcu_head rcu; struct btf_kfunc_set_tab *kfunc_set_tab; struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab; struct btf_struct_metas *struct_meta_tab; /* split BTF support */ struct btf *base_btf; u32 start_id; /* first type ID in this BTF (0 for base BTF) */ u32 start_str_off; /* first string offset (0 for base BTF) */ char name[MODULE_NAME_LEN]; bool kernel_btf; }; enum verifier_phase { CHECK_META, CHECK_TYPE, }; struct resolve_vertex { const struct btf_type *t; u32 type_id; u16 next_member; }; enum visit_state { NOT_VISITED, VISITED, RESOLVED, }; enum resolve_mode { RESOLVE_TBD, /* To Be Determined */ RESOLVE_PTR, /* Resolving for Pointer */ RESOLVE_STRUCT_OR_ARRAY, /* Resolving for struct/union * or array */ }; #define MAX_RESOLVE_DEPTH 32 struct btf_sec_info { u32 off; u32 len; }; struct btf_verifier_env { struct btf *btf; u8 *visit_states; struct resolve_vertex stack[MAX_RESOLVE_DEPTH]; struct bpf_verifier_log log; u32 log_type_id; u32 top_stack; enum verifier_phase phase; enum resolve_mode resolve_mode; }; static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_UNKN] = "UNKNOWN", [BTF_KIND_INT] = "INT", [BTF_KIND_PTR] = "PTR", [BTF_KIND_ARRAY] = "ARRAY", [BTF_KIND_STRUCT] = "STRUCT", [BTF_KIND_UNION] = "UNION", [BTF_KIND_ENUM] = "ENUM", [BTF_KIND_FWD] = "FWD", [BTF_KIND_TYPEDEF] = "TYPEDEF", [BTF_KIND_VOLATILE] = "VOLATILE", [BTF_KIND_CONST] = "CONST", [BTF_KIND_RESTRICT] = "RESTRICT", [BTF_KIND_FUNC] = "FUNC", [BTF_KIND_FUNC_PROTO] = "FUNC_PROTO", [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", [BTF_KIND_TYPE_TAG] = "TYPE_TAG", [BTF_KIND_ENUM64] = "ENUM64", }; const char *btf_type_str(const struct btf_type *t) { return btf_kind_str[BTF_INFO_KIND(t->info)]; } /* Chunk size we use in safe copy of data to be shown. */ #define BTF_SHOW_OBJ_SAFE_SIZE 32 /* * This is the maximum size of a base type value (equivalent to a * 128-bit int); if we are at the end of our safe buffer and have * less than 16 bytes space we can't be assured of being able * to copy the next type safely, so in such cases we will initiate * a new copy. */ #define BTF_SHOW_OBJ_BASE_TYPE_SIZE 16 /* Type name size */ #define BTF_SHOW_NAME_SIZE 80 /* * The suffix of a type that indicates it cannot alias another type when * comparing BTF IDs for kfunc invocations. */ #define NOCAST_ALIAS_SUFFIX "___init" /* * Common data to all BTF show operations. Private show functions can add * their own data to a structure containing a struct btf_show and consult it * in the show callback. See btf_type_show() below. * * One challenge with showing nested data is we want to skip 0-valued * data, but in order to figure out whether a nested object is all zeros * we need to walk through it. As a result, we need to make two passes * when handling structs, unions and arrays; the first path simply looks * for nonzero data, while the second actually does the display. The first * pass is signalled by show->state.depth_check being set, and if we * encounter a non-zero value we set show->state.depth_to_show to * the depth at which we encountered it. When we have completed the * first pass, we will know if anything needs to be displayed if * depth_to_show > depth. See btf_[struct,array]_show() for the * implementation of this. * * Another problem is we want to ensure the data for display is safe to * access. To support this, the anonymous "struct {} obj" tracks the data * object and our safe copy of it. We copy portions of the data needed * to the object "copy" buffer, but because its size is limited to * BTF_SHOW_OBJ_COPY_LEN bytes, multiple copies may be required as we * traverse larger objects for display. * * The various data type show functions all start with a call to * btf_show_start_type() which returns a pointer to the safe copy * of the data needed (or if BTF_SHOW_UNSAFE is specified, to the * raw data itself). btf_show_obj_safe() is responsible for * using copy_from_kernel_nofault() to update the safe data if necessary * as we traverse the object's data. skbuff-like semantics are * used: * * - obj.head points to the start of the toplevel object for display * - obj.size is the size of the toplevel object * - obj.data points to the current point in the original data at * which our safe data starts. obj.data will advance as we copy * portions of the data. * * In most cases a single copy will suffice, but larger data structures * such as "struct task_struct" will require many copies. The logic in * btf_show_obj_safe() handles the logic that determines if a new * copy_from_kernel_nofault() is needed. */ struct btf_show { u64 flags; void *target; /* target of show operation (seq file, buffer) */ void (*showfn)(struct btf_show *show, const char *fmt, va_list args); const struct btf *btf; /* below are used during iteration */ struct { u8 depth; u8 depth_to_show; u8 depth_check; u8 array_member:1, array_terminated:1; u16 array_encoding; u32 type_id; int status; /* non-zero for error */ const struct btf_type *type; const struct btf_member *member; char name[BTF_SHOW_NAME_SIZE]; /* space for member name/type */ } state; struct { u32 size; void *head; void *data; u8 safe[BTF_SHOW_OBJ_SAFE_SIZE]; } obj; }; struct btf_kind_operations { s32 (*check_meta)(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left); int (*resolve)(struct btf_verifier_env *env, const struct resolve_vertex *v); int (*check_member)(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type); int (*check_kflag_member)(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type); void (*log_details)(struct btf_verifier_env *env, const struct btf_type *t); void (*show)(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offsets, struct btf_show *show); }; static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS]; static struct btf_type btf_void; static int btf_resolve(struct btf_verifier_env *env, const struct btf_type *t, u32 type_id); static int btf_func_check(struct btf_verifier_env *env, const struct btf_type *t); static bool btf_type_is_modifier(const struct btf_type *t) { /* Some of them is not strictly a C modifier * but they are grouped into the same bucket * for BTF concern: * A type (t) that refers to another * type through t->type AND its size cannot * be determined without following the t->type. * * ptr does not fall into this bucket * because its size is always sizeof(void *). */ switch (BTF_INFO_KIND(t->info)) { case BTF_KIND_TYPEDEF: case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: case BTF_KIND_TYPE_TAG: return true; } return false; } bool btf_type_is_void(const struct btf_type *t) { return t == &btf_void; } static bool btf_type_is_fwd(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_FWD; } static bool btf_type_is_datasec(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; } static bool btf_type_is_decl_tag(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG; } static bool btf_type_nosize(const struct btf_type *t) { return btf_type_is_void(t) || btf_type_is_fwd(t) || btf_type_is_func(t) || btf_type_is_func_proto(t) || btf_type_is_decl_tag(t); } static bool btf_type_nosize_or_null(const struct btf_type *t) { return !t || btf_type_nosize(t); } static bool btf_type_is_decl_tag_target(const struct btf_type *t) { return btf_type_is_func(t) || btf_type_is_struct(t) || btf_type_is_var(t) || btf_type_is_typedef(t); } u32 btf_nr_types(const struct btf *btf) { u32 total = 0; while (btf) { total += btf->nr_types; btf = btf->base_btf; } return total; } s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) { const struct btf_type *t; const char *tname; u32 i, total; total = btf_nr_types(btf); for (i = 1; i < total; i++) { t = btf_type_by_id(btf, i); if (BTF_INFO_KIND(t->info) != kind) continue; tname = btf_name_by_offset(btf, t->name_off); if (!strcmp(tname, name)) return i; } return -ENOENT; } s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p) { struct btf *btf; s32 ret; int id; btf = bpf_get_btf_vmlinux(); if (IS_ERR(btf)) return PTR_ERR(btf); if (!btf) return -EINVAL; ret = btf_find_by_name_kind(btf, name, kind); /* ret is never zero, since btf_find_by_name_kind returns * positive btf_id or negative error. */ if (ret > 0) { btf_get(btf); *btf_p = btf; return ret; } /* If name is not found in vmlinux's BTF then search in module's BTFs */ spin_lock_bh(&btf_idr_lock); idr_for_each_entry(&btf_idr, btf, id) { if (!btf_is_module(btf)) continue; /* linear search could be slow hence unlock/lock * the IDR to avoiding holding it for too long */ btf_get(btf); spin_unlock_bh(&btf_idr_lock); ret = btf_find_by_name_kind(btf, name, kind); if (ret > 0) { *btf_p = btf; return ret; } btf_put(btf); spin_lock_bh(&btf_idr_lock); } spin_unlock_bh(&btf_idr_lock); return ret; } const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, u32 id, u32 *res_id) { const struct btf_type *t = btf_type_by_id(btf, id); while (btf_type_is_modifier(t)) { id = t->type; t = btf_type_by_id(btf, t->type); } if (res_id) *res_id = id; return t; } const struct btf_type *btf_type_resolve_ptr(const struct btf *btf, u32 id, u32 *res_id) { const struct btf_type *t; t = btf_type_skip_modifiers(btf, id, NULL); if (!btf_type_is_ptr(t)) return NULL; return btf_type_skip_modifiers(btf, t->type, res_id); } const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf, u32 id, u32 *res_id) { const struct btf_type *ptype; ptype = btf_type_resolve_ptr(btf, id, res_id); if (ptype && btf_type_is_func_proto(ptype)) return ptype; return NULL; } /* Types that act only as a source, not sink or intermediate * type when resolving. */ static bool btf_type_is_resolve_source_only(const struct btf_type *t) { return btf_type_is_var(t) || btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } /* What types need to be resolved? * * btf_type_is_modifier() is an obvious one. * * btf_type_is_struct() because its member refers to * another type (through member->type). * * btf_type_is_var() because the variable refers to * another type. btf_type_is_datasec() holds multiple * btf_type_is_var() types that need resolving. * * btf_type_is_array() because its element (array->type) * refers to another type. Array can be thought of a * special case of struct while array just has the same * member-type repeated by array->nelems of times. */ static bool btf_type_needs_resolve(const struct btf_type *t) { return btf_type_is_modifier(t) || btf_type_is_ptr(t) || btf_type_is_struct(t) || btf_type_is_array(t) || btf_type_is_var(t) || btf_type_is_func(t) || btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } /* t->size can be used */ static bool btf_type_has_size(const struct btf_type *t) { switch (BTF_INFO_KIND(t->info)) { case BTF_KIND_INT: case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: case BTF_KIND_DATASEC: case BTF_KIND_FLOAT: case BTF_KIND_ENUM64: return true; } return false; } static const char *btf_int_encoding_str(u8 encoding) { if (encoding == 0) return "(none)"; else if (encoding == BTF_INT_SIGNED) return "SIGNED"; else if (encoding == BTF_INT_CHAR) return "CHAR"; else if (encoding == BTF_INT_BOOL) return "BOOL"; else return "UNKN"; } static u32 btf_type_int(const struct btf_type *t) { return *(u32 *)(t + 1); } static const struct btf_array *btf_type_array(const struct btf_type *t) { return (const struct btf_array *)(t + 1); } static const struct btf_enum *btf_type_enum(const struct btf_type *t) { return (const struct btf_enum *)(t + 1); } static const struct btf_var *btf_type_var(const struct btf_type *t) { return (const struct btf_var *)(t + 1); } static const struct btf_decl_tag *btf_type_decl_tag(const struct btf_type *t) { return (const struct btf_decl_tag *)(t + 1); } static const struct btf_enum64 *btf_type_enum64(const struct btf_type *t) { return (const struct btf_enum64 *)(t + 1); } static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) { return kind_ops[BTF_INFO_KIND(t->info)]; } static bool btf_name_offset_valid(const struct btf *btf, u32 offset) { if (!BTF_STR_OFFSET_VALID(offset)) return false; while (offset < btf->start_str_off) btf = btf->base_btf; offset -= btf->start_str_off; return offset < btf->hdr.str_len; } static bool __btf_name_char_ok(char c, bool first) { if ((first ? !isalpha(c) : !isalnum(c)) && c != '_' && c != '.') return false; return true; } static const char *btf_str_by_offset(const struct btf *btf, u32 offset) { while (offset < btf->start_str_off) btf = btf->base_btf; offset -= btf->start_str_off; if (offset < btf->hdr.str_len) return &btf->strings[offset]; return NULL; } static bool __btf_name_valid(const struct btf *btf, u32 offset) { /* offset must be valid */ const char *src = btf_str_by_offset(btf, offset); const char *src_limit; if (!__btf_name_char_ok(*src, true)) return false; /* set a limit on identifier length */ src_limit = src + KSYM_NAME_LEN; src++; while (*src && src < src_limit) { if (!__btf_name_char_ok(*src, false)) return false; src++; } return !*src; } static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) { return __btf_name_valid(btf, offset); } static bool btf_name_valid_section(const struct btf *btf, u32 offset) { return __btf_name_valid(btf, offset); } static const char *__btf_name_by_offset(const struct btf *btf, u32 offset) { const char *name; if (!offset) return "(anon)"; name = btf_str_by_offset(btf, offset); return name ?: "(invalid-name-offset)"; } const char *btf_name_by_offset(const struct btf *btf, u32 offset) { return btf_str_by_offset(btf, offset); } const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) { while (type_id < btf->start_id) btf = btf->base_btf; type_id -= btf->start_id; if (type_id >= btf->nr_types) return NULL; return btf->types[type_id]; } EXPORT_SYMBOL_GPL(btf_type_by_id); /* * Regular int is not a bit field and it must be either * u8/u16/u32/u64 or __int128. */ static bool btf_type_int_is_regular(const struct btf_type *t) { u8 nr_bits, nr_bytes; u32 int_data; int_data = btf_type_int(t); nr_bits = BTF_INT_BITS(int_data); nr_bytes = BITS_ROUNDUP_BYTES(nr_bits); if (BITS_PER_BYTE_MASKED(nr_bits) || BTF_INT_OFFSET(int_data) || (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) && nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) && nr_bytes != (2 * sizeof(u64)))) { return false; } return true; } /* * Check that given struct member is a regular int with expected * offset and size. */ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size) { const struct btf_type *t; u32 id, int_data; u8 nr_bits; id = m->type; t = btf_type_id_size(btf, &id, NULL); if (!t || !btf_type_is_int(t)) return false; int_data = btf_type_int(t); nr_bits = BTF_INT_BITS(int_data); if (btf_type_kflag(s)) { u32 bitfield_size = BTF_MEMBER_BITFIELD_SIZE(m->offset); u32 bit_offset = BTF_MEMBER_BIT_OFFSET(m->offset); /* if kflag set, int should be a regular int and * bit offset should be at byte boundary. */ return !bitfield_size && BITS_ROUNDUP_BYTES(bit_offset) == expected_offset && BITS_ROUNDUP_BYTES(nr_bits) == expected_size; } if (BTF_INT_OFFSET(int_data) || BITS_PER_BYTE_MASKED(m->offset) || BITS_ROUNDUP_BYTES(m->offset) != expected_offset || BITS_PER_BYTE_MASKED(nr_bits) || BITS_ROUNDUP_BYTES(nr_bits) != expected_size) return false; return true; } /* Similar to btf_type_skip_modifiers() but does not skip typedefs. */ static const struct btf_type *btf_type_skip_qualifiers(const struct btf *btf, u32 id) { const struct btf_type *t = btf_type_by_id(btf, id); while (btf_type_is_modifier(t) && BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) { t = btf_type_by_id(btf, t->type); } return t; } #define BTF_SHOW_MAX_ITER 10 #define BTF_KIND_BIT(kind) (1ULL << kind) /* * Populate show->state.name with type name information. * Format of type name is * * [.member_name = ] (type_name) */ static const char *btf_show_name(struct btf_show *show) { /* BTF_MAX_ITER array suffixes "[]" */ const char *array_suffixes = "[][][][][][][][][][]"; const char *array_suffix = &array_suffixes[strlen(array_suffixes)]; /* BTF_MAX_ITER pointer suffixes "*" */ const char *ptr_suffixes = "**********"; const char *ptr_suffix = &ptr_suffixes[strlen(ptr_suffixes)]; const char *name = NULL, *prefix = "", *parens = ""; const struct btf_member *m = show->state.member; const struct btf_type *t; const struct btf_array *array; u32 id = show->state.type_id; const char *member = NULL; bool show_member = false; u64 kinds = 0; int i; show->state.name[0] = '\0'; /* * Don't show type name if we're showing an array member; * in that case we show the array type so don't need to repeat * ourselves for each member. */ if (show->state.array_member) return ""; /* Retrieve member name, if any. */ if (m) { member = btf_name_by_offset(show->btf, m->name_off); show_member = strlen(member) > 0; id = m->type; } /* * Start with type_id, as we have resolved the struct btf_type * * via btf_modifier_show() past the parent typedef to the child * struct, int etc it is defined as. In such cases, the type_id * still represents the starting type while the struct btf_type * * in our show->state points at the resolved type of the typedef. */ t = btf_type_by_id(show->btf, id); if (!t) return ""; /* * The goal here is to build up the right number of pointer and * array suffixes while ensuring the type name for a typedef * is represented. Along the way we accumulate a list of * BTF kinds we have encountered, since these will inform later * display; for example, pointer types will not require an * opening "{" for struct, we will just display the pointer value. * * We also want to accumulate the right number of pointer or array * indices in the format string while iterating until we get to * the typedef/pointee/array member target type. * * We start by pointing at the end of pointer and array suffix * strings; as we accumulate pointers and arrays we move the pointer * or array string backwards so it will show the expected number of * '*' or '[]' for the type. BTF_SHOW_MAX_ITER of nesting of pointers * and/or arrays and typedefs are supported as a precaution. * * We also want to get typedef name while proceeding to resolve * type it points to so that we can add parentheses if it is a * "typedef struct" etc. */ for (i = 0; i < BTF_SHOW_MAX_ITER; i++) { switch (BTF_INFO_KIND(t->info)) { case BTF_KIND_TYPEDEF: if (!name) name = btf_name_by_offset(show->btf, t->name_off); kinds |= BTF_KIND_BIT(BTF_KIND_TYPEDEF); id = t->type; break; case BTF_KIND_ARRAY: kinds |= BTF_KIND_BIT(BTF_KIND_ARRAY); parens = "["; if (!t) return ""; array = btf_type_array(t); if (array_suffix > array_suffixes) array_suffix -= 2; id = array->type; break; case BTF_KIND_PTR: kinds |= BTF_KIND_BIT(BTF_KIND_PTR); if (ptr_suffix > ptr_suffixes) ptr_suffix -= 1; id = t->type; break; default: id = 0; break; } if (!id) break; t = btf_type_skip_qualifiers(show->btf, id); } /* We may not be able to represent this type; bail to be safe */ if (i == BTF_SHOW_MAX_ITER) return ""; if (!name) name = btf_name_by_offset(show->btf, t->name_off); switch (BTF_INFO_KIND(t->info)) { case BTF_KIND_STRUCT: case BTF_KIND_UNION: prefix = BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT ? "struct" : "union"; /* if it's an array of struct/union, parens is already set */ if (!(kinds & (BTF_KIND_BIT(BTF_KIND_ARRAY)))) parens = "{"; break; case BTF_KIND_ENUM: case BTF_KIND_ENUM64: prefix = "enum"; break; default: break; } /* pointer does not require parens */ if (kinds & BTF_KIND_BIT(BTF_KIND_PTR)) parens = ""; /* typedef does not require struct/union/enum prefix */ if (kinds & BTF_KIND_BIT(BTF_KIND_TYPEDEF)) prefix = ""; if (!name) name = ""; /* Even if we don't want type name info, we want parentheses etc */ if (show->flags & BTF_SHOW_NONAME) snprintf(show->state.name, sizeof(show->state.name), "%s", parens); else snprintf(show->state.name, sizeof(show->state.name), "%s%s%s(%s%s%s%s%s%s)%s", /* first 3 strings comprise ".member = " */ show_member ? "." : "", show_member ? member : "", show_member ? " = " : "", /* ...next is our prefix (struct, enum, etc) */ prefix, strlen(prefix) > 0 && strlen(name) > 0 ? " " : "", /* ...this is the type name itself */ name, /* ...suffixed by the appropriate '*', '[]' suffixes */ strlen(ptr_suffix) > 0 ? " " : "", ptr_suffix, array_suffix, parens); return show->state.name; } static const char *__btf_show_indent(struct btf_show *show) { const char *indents = " "; const char *indent = &indents[strlen(indents)]; if ((indent - show->state.depth) >= indents) return indent - show->state.depth; return indents; } static const char *btf_show_indent(struct btf_show *show) { return show->flags & BTF_SHOW_COMPACT ? "" : __btf_show_indent(show); } static const char *btf_show_newline(struct btf_show *show) { return show->flags & BTF_SHOW_COMPACT ? "" : "\n"; } static const char *btf_show_delim(struct btf_show *show) { if (show->state.depth == 0) return ""; if ((show->flags & BTF_SHOW_COMPACT) && show->state.type && BTF_INFO_KIND(show->state.type->info) == BTF_KIND_UNION) return "|"; return ","; } __printf(2, 3) static void btf_show(struct btf_show *show, const char *fmt, ...) { va_list args; if (!show->state.depth_check) { va_start(args, fmt); show->showfn(show, fmt, args); va_end(args); } } /* Macros are used here as btf_show_type_value[s]() prepends and appends * format specifiers to the format specifier passed in; these do the work of * adding indentation, delimiters etc while the caller simply has to specify * the type value(s) in the format specifier + value(s). */ #define btf_show_type_value(show, fmt, value) \ do { \ if ((value) != (__typeof__(value))0 || \ (show->flags & BTF_SHOW_ZERO) || \ show->state.depth == 0) { \ btf_show(show, "%s%s" fmt "%s%s", \ btf_show_indent(show), \ btf_show_name(show), \ value, btf_show_delim(show), \ btf_show_newline(show)); \ if (show->state.depth > show->state.depth_to_show) \ show->state.depth_to_show = show->state.depth; \ } \ } while (0) #define btf_show_type_values(show, fmt, ...) \ do { \ btf_show(show, "%s%s" fmt "%s%s", btf_show_indent(show), \ btf_show_name(show), \ __VA_ARGS__, btf_show_delim(show), \ btf_show_newline(show)); \ if (show->state.depth > show->state.depth_to_show) \ show->state.depth_to_show = show->state.depth; \ } while (0) /* How much is left to copy to safe buffer after @data? */ static int btf_show_obj_size_left(struct btf_show *show, void *data) { return show->obj.head + show->obj.size - data; } /* Is object pointed to by @data of @size already copied to our safe buffer? */ static bool btf_show_obj_is_safe(struct btf_show *show, void *data, int size) { return data >= show->obj.data && (data + size) < (show->obj.data + BTF_SHOW_OBJ_SAFE_SIZE); } /* * If object pointed to by @data of @size falls within our safe buffer, return * the equivalent pointer to the same safe data. Assumes * copy_from_kernel_nofault() has already happened and our safe buffer is * populated. */ static void *__btf_show_obj_safe(struct btf_show *show, void *data, int size) { if (btf_show_obj_is_safe(show, data, size)) return show->obj.safe + (data - show->obj.data); return NULL; } /* * Return a safe-to-access version of data pointed to by @data. * We do this by copying the relevant amount of information * to the struct btf_show obj.safe buffer using copy_from_kernel_nofault(). * * If BTF_SHOW_UNSAFE is specified, just return data as-is; no * safe copy is needed. * * Otherwise we need to determine if we have the required amount * of data (determined by the @data pointer and the size of the * largest base type we can encounter (represented by * BTF_SHOW_OBJ_BASE_TYPE_SIZE). Having that much data ensures * that we will be able to print some of the current object, * and if more is needed a copy will be triggered. * Some objects such as structs will not fit into the buffer; * in such cases additional copies when we iterate over their * members may be needed. * * btf_show_obj_safe() is used to return a safe buffer for * btf_show_start_type(); this ensures that as we recurse into * nested types we always have safe data for the given type. * This approach is somewhat wasteful; it's possible for example * that when iterating over a large union we'll end up copying the * same data repeatedly, but the goal is safety not performance. * We use stack data as opposed to per-CPU buffers because the * iteration over a type can take some time, and preemption handling * would greatly complicate use of the safe buffer. */ static void *btf_show_obj_safe(struct btf_show *show, const struct btf_type *t, void *data) { const struct btf_type *rt; int size_left, size; void *safe = NULL; if (show->flags & BTF_SHOW_UNSAFE) return data; rt = btf_resolve_size(show->btf, t, &size); if (IS_ERR(rt)) { show->state.status = PTR_ERR(rt); return NULL; } /* * Is this toplevel object? If so, set total object size and * initialize pointers. Otherwise check if we still fall within * our safe object data. */ if (show->state.depth == 0) { show->obj.size = size; show->obj.head = data; } else { /* * If the size of the current object is > our remaining * safe buffer we _may_ need to do a new copy. However * consider the case of a nested struct; it's size pushes * us over the safe buffer limit, but showing any individual * struct members does not. In such cases, we don't need * to initiate a fresh copy yet; however we definitely need * at least BTF_SHOW_OBJ_BASE_TYPE_SIZE bytes left * in our buffer, regardless of the current object size. * The logic here is that as we resolve types we will * hit a base type at some point, and we need to be sure * the next chunk of data is safely available to display * that type info safely. We cannot rely on the size of * the current object here because it may be much larger * than our current buffer (e.g. task_struct is 8k). * All we want to do here is ensure that we can print the * next basic type, which we can if either * - the current type size is within the safe buffer; or * - at least BTF_SHOW_OBJ_BASE_TYPE_SIZE bytes are left in * the safe buffer. */ safe = __btf_show_obj_safe(show, data, min(size, BTF_SHOW_OBJ_BASE_TYPE_SIZE)); } /* * We need a new copy to our safe object, either because we haven't * yet copied and are initializing safe data, or because the data * we want falls outside the boundaries of the safe object. */ if (!safe) { size_left = btf_show_obj_size_left(show, data); if (size_left > BTF_SHOW_OBJ_SAFE_SIZE) size_left = BTF_SHOW_OBJ_SAFE_SIZE; show->state.status = copy_from_kernel_nofault(show->obj.safe, data, size_left); if (!show->state.status) { show->obj.data = data; safe = show->obj.safe; } } return safe; } /* * Set the type we are starting to show and return a safe data pointer * to be used for showing the associated data. */ static void *btf_show_start_type(struct btf_show *show, const struct btf_type *t, u32 type_id, void *data) { show->state.type = t; show->state.type_id = type_id; show->state.name[0] = '\0'; return btf_show_obj_safe(show, t, data); } static void btf_show_end_type(struct btf_show *show) { show->state.type = NULL; show->state.type_id = 0; show->state.name[0] = '\0'; } static void *btf_show_start_aggr_type(struct btf_show *show, const struct btf_type *t, u32 type_id, void *data) { void *safe_data = btf_show_start_type(show, t, type_id, data); if (!safe_data) return safe_data; btf_show(show, "%s%s%s", btf_show_indent(show), btf_show_name(show), btf_show_newline(show)); show->state.depth++; return safe_data; } static void btf_show_end_aggr_type(struct btf_show *show, const char *suffix) { show->state.depth--; btf_show(show, "%s%s%s%s", btf_show_indent(show), suffix, btf_show_delim(show), btf_show_newline(show)); btf_show_end_type(show); } static void btf_show_start_member(struct btf_show *show, const struct btf_member *m) { show->state.member = m; } static void btf_show_start_array_member(struct btf_show *show) { show->state.array_member = 1; btf_show_start_member(show, NULL); } static void btf_show_end_member(struct btf_show *show) { show->state.member = NULL; } static void btf_show_end_array_member(struct btf_show *show) { show->state.array_member = 0; btf_show_end_member(show); } static void *btf_show_start_array_type(struct btf_show *show, const struct btf_type *t, u32 type_id, u16 array_encoding, void *data) { show->state.array_encoding = array_encoding; show->state.array_terminated = 0; return btf_show_start_aggr_type(show, t, type_id, data); } static void btf_show_end_array_type(struct btf_show *show) { show->state.array_encoding = 0; show->state.array_terminated = 0; btf_show_end_aggr_type(show, "]"); } static void *btf_show_start_struct_type(struct btf_show *show, const struct btf_type *t, u32 type_id, void *data) { return btf_show_start_aggr_type(show, t, type_id, data); } static void btf_show_end_struct_type(struct btf_show *show) { btf_show_end_aggr_type(show, "}"); } __printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log, const char *fmt, ...) { va_list args; va_start(args, fmt); bpf_verifier_vlog(log, fmt, args); va_end(args); } __printf(2, 3) static void btf_verifier_log(struct btf_verifier_env *env, const char *fmt, ...) { struct bpf_verifier_log *log = &env->log; va_list args; if (!bpf_verifier_log_needed(log)) return; va_start(args, fmt); bpf_verifier_vlog(log, fmt, args); va_end(args); } __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env, const struct btf_type *t, bool log_details, const char *fmt, ...) { struct bpf_verifier_log *log = &env->log; struct btf *btf = env->btf; va_list args; if (!bpf_verifier_log_needed(log)) return; if (log->level == BPF_LOG_KERNEL) { /* btf verifier prints all types it is processing via * btf_verifier_log_type(..., fmt = NULL). * Skip those prints for in-kernel BTF verification. */ if (!fmt) return; /* Skip logging when loading module BTF with mismatches permitted */ if (env->btf->base_btf && IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) return; } __btf_verifier_log(log, "[%u] %s %s%s", env->log_type_id, btf_type_str(t), __btf_name_by_offset(btf, t->name_off), log_details ? " " : ""); if (log_details) btf_type_ops(t)->log_details(env, t); if (fmt && *fmt) { __btf_verifier_log(log, " "); va_start(args, fmt); bpf_verifier_vlog(log, fmt, args); va_end(args); } __btf_verifier_log(log, "\n"); } #define btf_verifier_log_type(env, t, ...) \ __btf_verifier_log_type((env), (t), true, __VA_ARGS__) #define btf_verifier_log_basic(env, t, ...) \ __btf_verifier_log_type((env), (t), false, __VA_ARGS__) __printf(4, 5) static void btf_verifier_log_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const char *fmt, ...) { struct bpf_verifier_log *log = &env->log; struct btf *btf = env->btf; va_list args; if (!bpf_verifier_log_needed(log)) return; if (log->level == BPF_LOG_KERNEL) { if (!fmt) return; /* Skip logging when loading module BTF with mismatches permitted */ if (env->btf->base_btf && IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) return; } /* The CHECK_META phase already did a btf dump. * * If member is logged again, it must hit an error in * parsing this member. It is useful to print out which * struct this member belongs to. */ if (env->phase != CHECK_META) btf_verifier_log_type(env, struct_type, NULL); if (btf_type_kflag(struct_type)) __btf_verifier_log(log, "\t%s type_id=%u bitfield_size=%u bits_offset=%u", __btf_name_by_offset(btf, member->name_off), member->type, BTF_MEMBER_BITFIELD_SIZE(member->offset), BTF_MEMBER_BIT_OFFSET(member->offset)); else __btf_verifier_log(log, "\t%s type_id=%u bits_offset=%u", __btf_name_by_offset(btf, member->name_off), member->type, member->offset); if (fmt && *fmt) { __btf_verifier_log(log, " "); va_start(args, fmt); bpf_verifier_vlog(log, fmt, args); va_end(args); } __btf_verifier_log(log, "\n"); } __printf(4, 5) static void btf_verifier_log_vsi(struct btf_verifier_env *env, const struct btf_type *datasec_type, const struct btf_var_secinfo *vsi, const char *fmt, ...) { struct bpf_verifier_log *log = &env->log; va_list args; if (!bpf_verifier_log_needed(log)) return; if (log->level == BPF_LOG_KERNEL && !fmt) return; if (env->phase != CHECK_META) btf_verifier_log_type(env, datasec_type, NULL); __btf_verifier_log(log, "\t type_id=%u offset=%u size=%u", vsi->type, vsi->offset, vsi->size); if (fmt && *fmt) { __btf_verifier_log(log, " "); va_start(args, fmt); bpf_verifier_vlog(log, fmt, args); va_end(args); } __btf_verifier_log(log, "\n"); } static void btf_verifier_log_hdr(struct btf_verifier_env *env, u32 btf_data_size) { struct bpf_verifier_log *log = &env->log; const struct btf *btf = env->btf; const struct btf_header *hdr; if (!bpf_verifier_log_needed(log)) return; if (log->level == BPF_LOG_KERNEL) return; hdr = &btf->hdr; __btf_verifier_log(log, "magic: 0x%x\n", hdr->magic); __btf_verifier_log(log, "version: %u\n", hdr->version); __btf_verifier_log(log, "flags: 0x%x\n", hdr->flags); __btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len); __btf_verifier_log(log, "type_off: %u\n", hdr->type_off); __btf_verifier_log(log, "type_len: %u\n", hdr->type_len); __btf_verifier_log(log, "str_off: %u\n", hdr->str_off); __btf_verifier_log(log, "str_len: %u\n", hdr->str_len); __btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size); } static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t) { struct btf *btf = env->btf; if (btf->types_size == btf->nr_types) { /* Expand 'types' array */ struct btf_type **new_types; u32 expand_by, new_size; if (btf->start_id + btf->types_size == BTF_MAX_TYPE) { btf_verifier_log(env, "Exceeded max num of types"); return -E2BIG; } expand_by = max_t(u32, btf->types_size >> 2, 16); new_size = min_t(u32, BTF_MAX_TYPE, btf->types_size + expand_by); new_types = kvcalloc(new_size, sizeof(*new_types), GFP_KERNEL | __GFP_NOWARN); if (!new_types) return -ENOMEM; if (btf->nr_types == 0) { if (!btf->base_btf) { /* lazily init VOID type */ new_types[0] = &btf_void; btf->nr_types++; } } else { memcpy(new_types, btf->types, sizeof(*btf->types) * btf->nr_types); } kvfree(btf->types); btf->types = new_types; btf->types_size = new_size; } btf->types[btf->nr_types++] = t; return 0; } static int btf_alloc_id(struct btf *btf) { int id; idr_preload(GFP_KERNEL); spin_lock_bh(&btf_idr_lock); id = idr_alloc_cyclic(&btf_idr, btf, 1, INT_MAX, GFP_ATOMIC); if (id > 0) btf->id = id; spin_unlock_bh(&btf_idr_lock); idr_preload_end(); if (WARN_ON_ONCE(!id)) return -ENOSPC; return id > 0 ? 0 : id; } static void btf_free_id(struct btf *btf) { unsigned long flags; /* * In map-in-map, calling map_delete_elem() on outer * map will call bpf_map_put on the inner map. * It will then eventually call btf_free_id() * on the inner map. Some of the map_delete_elem() * implementation may have irq disabled, so * we need to use the _irqsave() version instead * of the _bh() version. */ spin_lock_irqsave(&btf_idr_lock, flags); idr_remove(&btf_idr, btf->id); spin_unlock_irqrestore(&btf_idr_lock, flags); } static void btf_free_kfunc_set_tab(struct btf *btf) { struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab; int hook; if (!tab) return; /* For module BTF, we directly assign the sets being registered, so * there is nothing to free except kfunc_set_tab. */ if (btf_is_module(btf)) goto free_tab; for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) kfree(tab->sets[hook]); free_tab: kfree(tab); btf->kfunc_set_tab = NULL; } static void btf_free_dtor_kfunc_tab(struct btf *btf) { struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab; if (!tab) return; kfree(tab); btf->dtor_kfunc_tab = NULL; } static void btf_struct_metas_free(struct btf_struct_metas *tab) { int i; if (!tab) return; for (i = 0; i < tab->cnt; i++) btf_record_free(tab->types[i].record); kfree(tab); } static void btf_free_struct_meta_tab(struct btf *btf) { struct btf_struct_metas *tab = btf->struct_meta_tab; btf_struct_metas_free(tab); btf->struct_meta_tab = NULL; } static void btf_free(struct btf *btf) { btf_free_struct_meta_tab(btf); btf_free_dtor_kfunc_tab(btf); btf_free_kfunc_set_tab(btf); kvfree(btf->types); kvfree(btf->resolved_sizes); kvfree(btf->resolved_ids); kvfree(btf->data); kfree(btf); } static void btf_free_rcu(struct rcu_head *rcu) { struct btf *btf = container_of(rcu, struct btf, rcu); btf_free(btf); } void btf_get(struct btf *btf) { refcount_inc(&btf->refcnt); } void btf_put(struct btf *btf) { if (btf && refcount_dec_and_test(&btf->refcnt)) { btf_free_id(btf); call_rcu(&btf->rcu, btf_free_rcu); } } static int env_resolve_init(struct btf_verifier_env *env) { struct btf *btf = env->btf; u32 nr_types = btf->nr_types; u32 *resolved_sizes = NULL; u32 *resolved_ids = NULL; u8 *visit_states = NULL; resolved_sizes = kvcalloc(nr_types, sizeof(*resolved_sizes), GFP_KERNEL | __GFP_NOWARN); if (!resolved_sizes) goto nomem; resolved_ids = kvcalloc(nr_types, sizeof(*resolved_ids), GFP_KERNEL | __GFP_NOWARN); if (!resolved_ids) goto nomem; visit_states = kvcalloc(nr_types, sizeof(*visit_states), GFP_KERNEL | __GFP_NOWARN); if (!visit_states) goto nomem; btf->resolved_sizes = resolved_sizes; btf->resolved_ids = resolved_ids; env->visit_states = visit_states; return 0; nomem: kvfree(resolved_sizes); kvfree(resolved_ids); kvfree(visit_states); return -ENOMEM; } static void btf_verifier_env_free(struct btf_verifier_env *env) { kvfree(env->visit_states); kfree(env); } static bool env_type_is_resolve_sink(const struct btf_verifier_env *env, const struct btf_type *next_type) { switch (env->resolve_mode) { case RESOLVE_TBD: /* int, enum or void is a sink */ return !btf_type_needs_resolve(next_type); case RESOLVE_PTR: /* int, enum, void, struct, array, func or func_proto is a sink * for ptr */ return !btf_type_is_modifier(next_type) && !btf_type_is_ptr(next_type); case RESOLVE_STRUCT_OR_ARRAY: /* int, enum, void, ptr, func or func_proto is a sink * for struct and array */ return !btf_type_is_modifier(next_type) && !btf_type_is_array(next_type) && !btf_type_is_struct(next_type); default: BUG(); } } static bool env_type_is_resolved(const struct btf_verifier_env *env, u32 type_id) { /* base BTF types should be resolved by now */ if (type_id < env->btf->start_id) return true; return env->visit_states[type_id - env->btf->start_id] == RESOLVED; } static int env_stack_push(struct btf_verifier_env *env, const struct btf_type *t, u32 type_id) { const struct btf *btf = env->btf; struct resolve_vertex *v; if (env->top_stack == MAX_RESOLVE_DEPTH) return -E2BIG; if (type_id < btf->start_id || env->visit_states[type_id - btf->start_id] != NOT_VISITED) return -EEXIST; env->visit_states[type_id - btf->start_id] = VISITED; v = &env->stack[env->top_stack++]; v->t = t; v->type_id = type_id; v->next_member = 0; if (env->resolve_mode == RESOLVE_TBD) { if (btf_type_is_ptr(t)) env->resolve_mode = RESOLVE_PTR; else if (btf_type_is_struct(t) || btf_type_is_array(t)) env->resolve_mode = RESOLVE_STRUCT_OR_ARRAY; } return 0; } static void env_stack_set_next_member(struct btf_verifier_env *env, u16 next_member) { env->stack[env->top_stack - 1].next_member = next_member; } static void env_stack_pop_resolved(struct btf_verifier_env *env, u32 resolved_type_id, u32 resolved_size) { u32 type_id = env->stack[--(env->top_stack)].type_id; struct btf *btf = env->btf; type_id -= btf->start_id; /* adjust to local type id */ btf->resolved_sizes[type_id] = resolved_size; btf->resolved_ids[type_id] = resolved_type_id; env->visit_states[type_id] = RESOLVED; } static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env) { return env->top_stack ? &env->stack[env->top_stack - 1] : NULL; } /* Resolve the size of a passed-in "type" * * type: is an array (e.g. u32 array[x][y]) * return type: type "u32[x][y]", i.e. BTF_KIND_ARRAY, * *type_size: (x * y * sizeof(u32)). Hence, *type_size always * corresponds to the return type. * *elem_type: u32 * *elem_id: id of u32 * *total_nelems: (x * y). Hence, individual elem size is * (*type_size / *total_nelems) * *type_id: id of type if it's changed within the function, 0 if not * * type: is not an array (e.g. const struct X) * return type: type "struct X" * *type_size: sizeof(struct X) * *elem_type: same as return type ("struct X") * *elem_id: 0 * *total_nelems: 1 * *type_id: id of type if it's changed within the function, 0 if not */ static const struct btf_type * __btf_resolve_size(const struct btf *btf, const struct btf_type *type, u32 *type_size, const struct btf_type **elem_type, u32 *elem_id, u32 *total_nelems, u32 *type_id) { const struct btf_type *array_type = NULL; const struct btf_array *array = NULL; u32 i, size, nelems = 1, id = 0; for (i = 0; i < MAX_RESOLVE_DEPTH; i++) { switch (BTF_INFO_KIND(type->info)) { /* type->size can be used */ case BTF_KIND_INT: case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: case BTF_KIND_FLOAT: case BTF_KIND_ENUM64: size = type->size; goto resolved; case BTF_KIND_PTR: size = sizeof(void *); goto resolved; /* Modifiers */ case BTF_KIND_TYPEDEF: case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: case BTF_KIND_TYPE_TAG: id = type->type; type = btf_type_by_id(btf, type->type); break; case BTF_KIND_ARRAY: if (!array_type) array_type = type; array = btf_type_array(type); if (nelems && array->nelems > U32_MAX / nelems) return ERR_PTR(-EINVAL); nelems *= array->nelems; type = btf_type_by_id(btf, array->type); break; /* type without size */ default: return ERR_PTR(-EINVAL); } } return ERR_PTR(-EINVAL); resolved: if (nelems && size > U32_MAX / nelems) return ERR_PTR(-EINVAL); *type_size = nelems * size; if (total_nelems) *total_nelems = nelems; if (elem_type) *elem_type = type; if (elem_id) *elem_id = array ? array->type : 0; if (type_id && id) *type_id = id; return array_type ? : type; } const struct btf_type * btf_resolve_size(const struct btf *btf, const struct btf_type *type, u32 *type_size) { return __btf_resolve_size(btf, type, type_size, NULL, NULL, NULL, NULL); } static u32 btf_resolved_type_id(const struct btf *btf, u32 type_id) { while (type_id < btf->start_id) btf = btf->base_btf; return btf->resolved_ids[type_id - btf->start_id]; } /* The input param "type_id" must point to a needs_resolve type */ static const struct btf_type *btf_type_id_resolve(const struct btf *btf, u32 *type_id) { *type_id = btf_resolved_type_id(btf, *type_id); return btf_type_by_id(btf, *type_id); } static u32 btf_resolved_type_size(const struct btf *btf, u32 type_id) { while (type_id < btf->start_id) btf = btf->base_btf; return btf->resolved_sizes[type_id - btf->start_id]; } const struct btf_type *btf_type_id_size(const struct btf *btf, u32 *type_id, u32 *ret_size) { const struct btf_type *size_type; u32 size_type_id = *type_id; u32 size = 0; size_type = btf_type_by_id(btf, size_type_id); if (btf_type_nosize_or_null(size_type)) return NULL; if (btf_type_has_size(size_type)) { size = size_type->size; } else if (btf_type_is_array(size_type)) { size = btf_resolved_type_size(btf, size_type_id); } else if (btf_type_is_ptr(size_type)) { size = sizeof(void *); } else { if (WARN_ON_ONCE(!btf_type_is_modifier(size_type) && !btf_type_is_var(size_type))) return NULL; size_type_id = btf_resolved_type_id(btf, size_type_id); size_type = btf_type_by_id(btf, size_type_id); if (btf_type_nosize_or_null(size_type)) return NULL; else if (btf_type_has_size(size_type)) size = size_type->size; else if (btf_type_is_array(size_type)) size = btf_resolved_type_size(btf, size_type_id); else if (btf_type_is_ptr(size_type)) size = sizeof(void *); else return NULL; } *type_id = size_type_id; if (ret_size) *ret_size = size; return size_type; } static int btf_df_check_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type) { btf_verifier_log_basic(env, struct_type, "Unsupported check_member"); return -EINVAL; } static int btf_df_check_kflag_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type) { btf_verifier_log_basic(env, struct_type, "Unsupported check_kflag_member"); return -EINVAL; } /* Used for ptr, array struct/union and float type members. * int, enum and modifier types have their specific callback functions. */ static int btf_generic_check_kflag_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type) { if (BTF_MEMBER_BITFIELD_SIZE(member->offset)) { btf_verifier_log_member(env, struct_type, member, "Invalid member bitfield_size"); return -EINVAL; } /* bitfield size is 0, so member->offset represents bit offset only. * It is safe to call non kflag check_member variants. */ return btf_type_ops(member_type)->check_member(env, struct_type, member, member_type); } static int btf_df_resolve(struct btf_verifier_env *env, const struct resolve_vertex *v) { btf_verifier_log_basic(env, v->t, "Unsupported resolve"); return -EINVAL; } static void btf_df_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offsets, struct btf_show *show) { btf_show(show, "<unsupported kind:%u>", BTF_INFO_KIND(t->info)); } static int btf_int_check_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type) { u32 int_data = btf_type_int(member_type); u32 struct_bits_off = member->offset; u32 struct_size = struct_type->size; u32 nr_copy_bits; u32 bytes_offset; if (U32_MAX - struct_bits_off < BTF_INT_OFFSET(int_data)) { btf_verifier_log_member(env, struct_type, member, "bits_offset exceeds U32_MAX"); return -EINVAL; } struct_bits_off += BTF_INT_OFFSET(int_data); bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); nr_copy_bits = BTF_INT_BITS(int_data) + BITS_PER_BYTE_MASKED(struct_bits_off); if (nr_copy_bits > BITS_PER_U128) { btf_verifier_log_member(env, struct_type, member, "nr_copy_bits exceeds 128"); return -EINVAL; } if (struct_size < bytes_offset || struct_size - bytes_offset < BITS_ROUNDUP_BYTES(nr_copy_bits)) { btf_verifier_log_member(env, struct_type, member, "Member exceeds struct_size"); return -EINVAL; } return 0; } static int btf_int_check_kflag_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type) { u32 struct_bits_off, nr_bits, nr_int_data_bits, bytes_offset; u32 int_data = btf_type_int(member_type); u32 struct_size = struct_type->size; u32 nr_copy_bits; /* a regular int type is required for the kflag int member */ if (!btf_type_int_is_regular(member_type)) { btf_verifier_log_member(env, struct_type, member, "Invalid member base type"); return -EINVAL; } /* check sanity of bitfield size */ nr_bits = BTF_MEMBER_BITFIELD_SIZE(member->offset); struct_bits_off = BTF_MEMBER_BIT_OFFSET(member->offset); nr_int_data_bits = BTF_INT_BITS(int_data); if (!nr_bits) { /* Not a bitfield member, member offset must be at byte * boundary. */ if (BITS_PER_BYTE_MASKED(struct_bits_off)) { btf_verifier_log_member(env, struct_type, member, "Invalid member offset"); return -EINVAL; } nr_bits = nr_int_data_bits; } else if (nr_bits > nr_int_data_bits) { btf_verifier_log_member(env, struct_type, member, "Invalid member bitfield_size"); return -EINVAL; } bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off); if (nr_copy_bits > BITS_PER_U128) { btf_verifier_log_member(env, struct_type, member, "nr_copy_bits exceeds 128"); return -EINVAL; } if (struct_size < bytes_offset || struct_size - bytes_offset < BITS_ROUNDUP_BYTES(nr_copy_bits)) { btf_verifier_log_member(env, struct_type, member, "Member exceeds struct_size"); return -EINVAL; } return 0; } static s32 btf_int_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { u32 int_data, nr_bits, meta_needed = sizeof(int_data); u16 encoding; if (meta_left < meta_needed) { btf_verifier_log_basic(env, t, "meta_left:%u meta_needed:%u", meta_left, meta_needed); return -EINVAL; } if (btf_type_vlen(t)) { btf_verifier_log_type(env, t, "vlen != 0"); return -EINVAL; } if (btf_type_kflag(t)) { btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); return -EINVAL; } int_data = btf_type_int(t); if (int_data & ~BTF_INT_MASK) { btf_verifier_log_basic(env, t, "Invalid int_data:%x", int_data); return -EINVAL; } nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data); if (nr_bits > BITS_PER_U128) { btf_verifier_log_type(env, t, "nr_bits exceeds %zu", BITS_PER_U128); return -EINVAL; } if (BITS_ROUNDUP_BYTES(nr_bits) > t->size) { btf_verifier_log_type(env, t, "nr_bits exceeds type_size"); return -EINVAL; } /* * Only one of the encoding bits is allowed and it * should be sufficient for the pretty print purpose (i.e. decoding). * Multiple bits can be allowed later if it is found * to be insufficient. */ encoding = BTF_INT_ENCODING(int_data); if (encoding && encoding != BTF_INT_SIGNED && encoding != BTF_INT_CHAR && encoding != BTF_INT_BOOL) { btf_verifier_log_type(env, t, "Unsupported encoding"); return -ENOTSUPP; } btf_verifier_log_type(env, t, NULL); return meta_needed; } static void btf_int_log(struct btf_verifier_env *env, const struct btf_type *t) { int int_data = btf_type_int(t); btf_verifier_log(env, "size=%u bits_offset=%u nr_bits=%u encoding=%s", t->size, BTF_INT_OFFSET(int_data), BTF_INT_BITS(int_data), btf_int_encoding_str(BTF_INT_ENCODING(int_data))); } static void btf_int128_print(struct btf_show *show, void *data) { /* data points to a __int128 number. * Suppose * int128_num = *(__int128 *)data; * The below formulas shows what upper_num and lower_num represents: * upper_num = int128_num >> 64; * lower_num = int128_num & 0xffffffffFFFFFFFFULL; */ u64 upper_num, lower_num; #ifdef __BIG_ENDIAN_BITFIELD upper_num = *(u64 *)data; lower_num = *(u64 *)(data + 8); #else upper_num = *(u64 *)(data + 8); lower_num = *(u64 *)data; #endif if (upper_num == 0) btf_show_type_value(show, "0x%llx", lower_num); else btf_show_type_values(show, "0x%llx%016llx", upper_num, lower_num); } static void btf_int128_shift(u64 *print_num, u16 left_shift_bits, u16 right_shift_bits) { u64 upper_num, lower_num; #ifdef __BIG_ENDIAN_BITFIELD upper_num = print_num[0]; lower_num = print_num[1]; #else upper_num = print_num[1]; lower_num = print_num[0]; #endif /* shake out un-needed bits by shift/or operations */ if (left_shift_bits >= 64) { upper_num = lower_num << (left_shift_bits - 64); lower_num = 0; } else { upper_num = (upper_num << left_shift_bits) | (lower_num >> (64 - left_shift_bits)); lower_num = lower_num << left_shift_bits; } if (right_shift_bits >= 64) { lower_num = upper_num >> (right_shift_bits - 64); upper_num = 0; } else { lower_num = (lower_num >> right_shift_bits) | (upper_num << (64 - right_shift_bits)); upper_num = upper_num >> right_shift_bits; } #ifdef __BIG_ENDIAN_BITFIELD print_num[0] = upper_num; print_num[1] = lower_num; #else print_num[0] = lower_num; print_num[1] = upper_num; #endif } static void btf_bitfield_show(void *data, u8 bits_offset, u8 nr_bits, struct btf_show *show) { u16 left_shift_bits, right_shift_bits; u8 nr_copy_bytes; u8 nr_copy_bits; u64 print_num[2] = {}; nr_copy_bits = nr_bits + bits_offset; nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); memcpy(print_num, data, nr_copy_bytes); #ifdef __BIG_ENDIAN_BITFIELD left_shift_bits = bits_offset; #else left_shift_bits = BITS_PER_U128 - nr_copy_bits; #endif right_shift_bits = BITS_PER_U128 - nr_bits; btf_int128_shift(print_num, left_shift_bits, right_shift_bits); btf_int128_print(show, print_num); } static void btf_int_bits_show(const struct btf *btf, const struct btf_type *t, void *data, u8 bits_offset, struct btf_show *show) { u32 int_data = btf_type_int(t); u8 nr_bits = BTF_INT_BITS(int_data); u8 total_bits_offset; /* * bits_offset is at most 7. * BTF_INT_OFFSET() cannot exceed 128 bits. */ total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); data += BITS_ROUNDDOWN_BYTES(total_bits_offset); bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset); btf_bitfield_show(data, bits_offset, nr_bits, show); } static void btf_int_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) { u32 int_data = btf_type_int(t); u8 encoding = BTF_INT_ENCODING(int_data); bool sign = encoding & BTF_INT_SIGNED; u8 nr_bits = BTF_INT_BITS(int_data); void *safe_data; safe_data = btf_show_start_type(show, t, type_id, data); if (!safe_data) return; if (bits_offset || BTF_INT_OFFSET(int_data) || BITS_PER_BYTE_MASKED(nr_bits)) { btf_int_bits_show(btf, t, safe_data, bits_offset, show); goto out; } switch (nr_bits) { case 128: btf_int128_print(show, safe_data); break; case 64: if (sign) btf_show_type_value(show, "%lld", *(s64 *)safe_data); else btf_show_type_value(show, "%llu", *(u64 *)safe_data); break; case 32: if (sign) btf_show_type_value(show, "%d", *(s32 *)safe_data); else btf_show_type_value(show, "%u", *(u32 *)safe_data); break; case 16: if (sign) btf_show_type_value(show, "%d", *(s16 *)safe_data); else btf_show_type_value(show, "%u", *(u16 *)safe_data); break; case 8: if (show->state.array_encoding == BTF_INT_CHAR) { /* check for null terminator */ if (show->state.array_terminated) break; if (*(char *)data == '\0') { show->state.array_terminated = 1; break; } if (isprint(*(char *)data)) { btf_show_type_value(show, "'%c'", *(char *)safe_data); break; } } if (sign) btf_show_type_value(show, "%d", *(s8 *)safe_data); else btf_show_type_value(show, "%u", *(u8 *)safe_data); break; default: btf_int_bits_show(btf, t, safe_data, bits_offset, show); break; } out: btf_show_end_type(show); } static const struct btf_kind_operations int_ops = { .check_meta = btf_int_check_meta, .resolve = btf_df_resolve, .check_member = btf_int_check_member, .check_kflag_member = btf_int_check_kflag_member, .log_details = btf_int_log, .show = btf_int_show, }; static int btf_modifier_check_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type) { const struct btf_type *resolved_type; u32 resolved_type_id = member->type; struct btf_member resolved_member; struct btf *btf = env->btf; resolved_type = btf_type_id_size(btf, &resolved_type_id, NULL); if (!resolved_type) { btf_verifier_log_member(env, struct_type, member, "Invalid member"); return -EINVAL; } resolved_member = *member; resolved_member.type = resolved_type_id; return btf_type_ops(resolved_type)->check_member(env, struct_type, &resolved_member, resolved_type); } static int btf_modifier_check_kflag_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type) { const struct btf_type *resolved_type; u32 resolved_type_id = member->type; struct btf_member resolved_member; struct btf *btf = env->btf; resolved_type = btf_type_id_size(btf, &resolved_type_id, NULL); if (!resolved_type) { btf_verifier_log_member(env, struct_type, member, "Invalid member"); return -EINVAL; } resolved_member = *member; resolved_member.type = resolved_type_id; return btf_type_ops(resolved_type)->check_kflag_member(env, struct_type, &resolved_member, resolved_type); } static int btf_ptr_check_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type) { u32 struct_size, struct_bits_off, bytes_offset; struct_size = struct_type->size; struct_bits_off = member->offset; bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); if (BITS_PER_BYTE_MASKED(struct_bits_off)) { btf_verifier_log_member(env, struct_type, member, "Member is not byte aligned"); return -EINVAL; } if (struct_size - bytes_offset < sizeof(void *)) { btf_verifier_log_member(env, struct_type, member, "Member exceeds struct_size"); return -EINVAL; } return 0; } static int btf_ref_type_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { const char *value; if (btf_type_vlen(t)) { btf_verifier_log_type(env, t, "vlen != 0"); return -EINVAL; } if (btf_type_kflag(t)) { btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); return -EINVAL; } if (!BTF_TYPE_ID_VALID(t->type)) { btf_verifier_log_type(env, t, "Invalid type_id"); return -EINVAL; } /* typedef/type_tag type must have a valid name, and other ref types, * volatile, const, restrict, should have a null name. */ if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF) { if (!t->name_off || !btf_name_valid_identifier(env->btf, t->name_off)) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } } else if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG) { value = btf_name_by_offset(env->btf, t->name_off); if (!value || !value[0]) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } } else { if (t->name_off) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } } btf_verifier_log_type(env, t, NULL); return 0; } static int btf_modifier_resolve(struct btf_verifier_env *env, const struct resolve_vertex *v) { const struct btf_type *t = v->t; const struct btf_type *next_type; u32 next_type_id = t->type; struct btf *btf = env->btf; next_type = btf_type_by_id(btf, next_type_id); if (!next_type || btf_type_is_resolve_source_only(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } if (!env_type_is_resolve_sink(env, next_type) && !env_type_is_resolved(env, next_type_id)) return env_stack_push(env, next_type, next_type_id); /* Figure out the resolved next_type_id with size. * They will be stored in the current modifier's * resolved_ids and resolved_sizes such that it can * save us a few type-following when we use it later (e.g. in * pretty print). */ if (!btf_type_id_size(btf, &next_type_id, NULL)) { if (env_type_is_resolved(env, next_type_id)) next_type = btf_type_id_resolve(btf, &next_type_id); /* "typedef void new_void", "const void"...etc */ if (!btf_type_is_void(next_type) && !btf_type_is_fwd(next_type) && !btf_type_is_func_proto(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } } env_stack_pop_resolved(env, next_type_id, 0); return 0; } static int btf_var_resolve(struct btf_verifier_env *env, const struct resolve_vertex *v) { const struct btf_type *next_type; const struct btf_type *t = v->t; u32 next_type_id = t->type; struct btf *btf = env->btf; next_type = btf_type_by_id(btf, next_type_id); if (!next_type || btf_type_is_resolve_source_only(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } if (!env_type_is_resolve_sink(env, next_type) && !env_type_is_resolved(env, next_type_id)) return env_stack_push(env, next_type, next_type_id); if (btf_type_is_modifier(next_type)) { const struct btf_type *resolved_type; u32 resolved_type_id; resolved_type_id = next_type_id; resolved_type = btf_type_id_resolve(btf, &resolved_type_id); if (btf_type_is_ptr(resolved_type) && !env_type_is_resolve_sink(env, resolved_type) && !env_type_is_resolved(env, resolved_type_id)) return env_stack_push(env, resolved_type, resolved_type_id); } /* We must resolve to something concrete at this point, no * forward types or similar that would resolve to size of * zero is allowed. */ if (!btf_type_id_size(btf, &next_type_id, NULL)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } env_stack_pop_resolved(env, next_type_id, 0); return 0; } static int btf_ptr_resolve(struct btf_verifier_env *env, const struct resolve_vertex *v) { const struct btf_type *next_type; const struct btf_type *t = v->t; u32 next_type_id = t->type; struct btf *btf = env->btf; next_type = btf_type_by_id(btf, next_type_id); if (!next_type || btf_type_is_resolve_source_only(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } if (!env_type_is_resolve_sink(env, next_type) && !env_type_is_resolved(env, next_type_id)) return env_stack_push(env, next_type, next_type_id); /* If the modifier was RESOLVED during RESOLVE_STRUCT_OR_ARRAY, * the modifier may have stopped resolving when it was resolved * to a ptr (last-resolved-ptr). * * We now need to continue from the last-resolved-ptr to * ensure the last-resolved-ptr will not referring back to * the current ptr (t). */ if (btf_type_is_modifier(next_type)) { const struct btf_type *resolved_type; u32 resolved_type_id; resolved_type_id = next_type_id; resolved_type = btf_type_id_resolve(btf, &resolved_type_id); if (btf_type_is_ptr(resolved_type) && !env_type_is_resolve_sink(env, resolved_type) && !env_type_is_resolved(env, resolved_type_id)) return env_stack_push(env, resolved_type, resolved_type_id); } if (!btf_type_id_size(btf, &next_type_id, NULL)) { if (env_type_is_resolved(env, next_type_id)) next_type = btf_type_id_resolve(btf, &next_type_id); if (!btf_type_is_void(next_type) && !btf_type_is_fwd(next_type) && !btf_type_is_func_proto(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } } env_stack_pop_resolved(env, next_type_id, 0); return 0; } static void btf_modifier_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) { if (btf->resolved_ids) t = btf_type_id_resolve(btf, &type_id); else t = btf_type_skip_modifiers(btf, type_id, NULL); btf_type_ops(t)->show(btf, t, type_id, data, bits_offset, show); } static void btf_var_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) { t = btf_type_id_resolve(btf, &type_id); btf_type_ops(t)->show(btf, t, type_id, data, bits_offset, show); } static void btf_ptr_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) { void *safe_data; safe_data = btf_show_start_type(show, t, type_id, data); if (!safe_data) return; /* It is a hashed value unless BTF_SHOW_PTR_RAW is specified */ if (show->flags & BTF_SHOW_PTR_RAW) btf_show_type_value(show, "0x%px", *(void **)safe_data); else btf_show_type_value(show, "0x%p", *(void **)safe_data); btf_show_end_type(show); } static void btf_ref_type_log(struct btf_verifier_env *env, const struct btf_type *t) { btf_verifier_log(env, "type_id=%u", t->type); } static struct btf_kind_operations modifier_ops = { .check_meta = btf_ref_type_check_meta, .resolve = btf_modifier_resolve, .check_member = btf_modifier_check_member, .check_kflag_member = btf_modifier_check_kflag_member, .log_details = btf_ref_type_log, .show = btf_modifier_show, }; static struct btf_kind_operations ptr_ops = { .check_meta = btf_ref_type_check_meta, .resolve = btf_ptr_resolve, .check_member = btf_ptr_check_member, .check_kflag_member = btf_generic_check_kflag_member, .log_details = btf_ref_type_log, .show = btf_ptr_show, }; static s32 btf_fwd_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { if (btf_type_vlen(t)) { btf_verifier_log_type(env, t, "vlen != 0"); return -EINVAL; } if (t->type) { btf_verifier_log_type(env, t, "type != 0"); return -EINVAL; } /* fwd type must have a valid name */ if (!t->name_off || !btf_name_valid_identifier(env->btf, t->name_off)) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } btf_verifier_log_type(env, t, NULL); return 0; } static void btf_fwd_type_log(struct btf_verifier_env *env, const struct btf_type *t) { btf_verifier_log(env, "%s", btf_type_kflag(t) ? "union" : "struct"); } static struct btf_kind_operations fwd_ops = { .check_meta = btf_fwd_check_meta, .resolve = btf_df_resolve, .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_fwd_type_log, .show = btf_df_show, }; static int btf_array_check_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type) { u32 struct_bits_off = member->offset; u32 struct_size, bytes_offset; u32 array_type_id, array_size; struct btf *btf = env->btf; if (BITS_PER_BYTE_MASKED(struct_bits_off)) { btf_verifier_log_member(env, struct_type, member, "Member is not byte aligned"); return -EINVAL; } array_type_id = member->type; btf_type_id_size(btf, &array_type_id, &array_size); struct_size = struct_type->size; bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); if (struct_size - bytes_offset < array_size) { btf_verifier_log_member(env, struct_type, member, "Member exceeds struct_size"); return -EINVAL; } return 0; } static s32 btf_array_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { const struct btf_array *array = btf_type_array(t); u32 meta_needed = sizeof(*array); if (meta_left < meta_needed) { btf_verifier_log_basic(env, t, "meta_left:%u meta_needed:%u", meta_left, meta_needed); return -EINVAL; } /* array type should not have a name */ if (t->name_off) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } if (btf_type_vlen(t)) { btf_verifier_log_type(env, t, "vlen != 0"); return -EINVAL; } if (btf_type_kflag(t)) { btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); return -EINVAL; } if (t->size) { btf_verifier_log_type(env, t, "size != 0"); return -EINVAL; } /* Array elem type and index type cannot be in type void, * so !array->type and !array->index_type are not allowed. */ if (!array->type || !BTF_TYPE_ID_VALID(array->type)) { btf_verifier_log_type(env, t, "Invalid elem"); return -EINVAL; } if (!array->index_type || !BTF_TYPE_ID_VALID(array->index_type)) { btf_verifier_log_type(env, t, "Invalid index"); return -EINVAL; } btf_verifier_log_type(env, t, NULL); return meta_needed; } static int btf_array_resolve(struct btf_verifier_env *env, const struct resolve_vertex *v) { const struct btf_array *array = btf_type_array(v->t); const struct btf_type *elem_type, *index_type; u32 elem_type_id, index_type_id; struct btf *btf = env->btf; u32 elem_size; /* Check array->index_type */ index_type_id = array->index_type; index_type = btf_type_by_id(btf, index_type_id); if (btf_type_nosize_or_null(index_type) || btf_type_is_resolve_source_only(index_type)) { btf_verifier_log_type(env, v->t, "Invalid index"); return -EINVAL; } if (!env_type_is_resolve_sink(env, index_type) && !env_type_is_resolved(env, index_type_id)) return env_stack_push(env, index_type, index_type_id); index_type = btf_type_id_size(btf, &index_type_id, NULL); if (!index_type || !btf_type_is_int(index_type) || !btf_type_int_is_regular(index_type)) { btf_verifier_log_type(env, v->t, "Invalid index"); return -EINVAL; } /* Check array->type */ elem_type_id = array->type; elem_type = btf_type_by_id(btf, elem_type_id); if (btf_type_nosize_or_null(elem_type) || btf_type_is_resolve_source_only(elem_type)) { btf_verifier_log_type(env, v->t, "Invalid elem"); return -EINVAL; } if (!env_type_is_resolve_sink(env, elem_type) && !env_type_is_resolved(env, elem_type_id)) return env_stack_push(env, elem_type, elem_type_id); elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size); if (!elem_type) { btf_verifier_log_type(env, v->t, "Invalid elem"); return -EINVAL; } if (btf_type_is_int(elem_type) && !btf_type_int_is_regular(elem_type)) { btf_verifier_log_type(env, v->t, "Invalid array of int"); return -EINVAL; } if (array->nelems && elem_size > U32_MAX / array->nelems) { btf_verifier_log_type(env, v->t, "Array size overflows U32_MAX"); return -EINVAL; } env_stack_pop_resolved(env, elem_type_id, elem_size * array->nelems); return 0; } static void btf_array_log(struct btf_verifier_env *env, const struct btf_type *t) { const struct btf_array *array = btf_type_array(t); btf_verifier_log(env, "type_id=%u index_type_id=%u nr_elems=%u", array->type, array->index_type, array->nelems); } static void __btf_array_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) { const struct btf_array *array = btf_type_array(t); const struct btf_kind_operations *elem_ops; const struct btf_type *elem_type; u32 i, elem_size = 0, elem_type_id; u16 encoding = 0; elem_type_id = array->type; elem_type = btf_type_skip_modifiers(btf, elem_type_id, NULL); if (elem_type && btf_type_has_size(elem_type)) elem_size = elem_type->size; if (elem_type && btf_type_is_int(elem_type)) { u32 int_type = btf_type_int(elem_type); encoding = BTF_INT_ENCODING(int_type); /* * BTF_INT_CHAR encoding never seems to be set for * char arrays, so if size is 1 and element is * printable as a char, we'll do that. */ if (elem_size == 1) encoding = BTF_INT_CHAR; } if (!btf_show_start_array_type(show, t, type_id, encoding, data)) return; if (!elem_type) goto out; elem_ops = btf_type_ops(elem_type); for (i = 0; i < array->nelems; i++) { btf_show_start_array_member(show); elem_ops->show(btf, elem_type, elem_type_id, data, bits_offset, show); data += elem_size; btf_show_end_array_member(show); if (show->state.array_terminated) break; } out: btf_show_end_array_type(show); } static void btf_array_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) { const struct btf_member *m = show->state.member; /* * First check if any members would be shown (are non-zero). * See comments above "struct btf_show" definition for more * details on how this works at a high-level. */ if (show->state.depth > 0 && !(show->flags & BTF_SHOW_ZERO)) { if (!show->state.depth_check) { show->state.depth_check = show->state.depth + 1; show->state.depth_to_show = 0; } __btf_array_show(btf, t, type_id, data, bits_offset, show); show->state.member = m; if (show->state.depth_check != show->state.depth + 1) return; show->state.depth_check = 0; if (show->state.depth_to_show <= show->state.depth) return; /* * Reaching here indicates we have recursed and found * non-zero array member(s). */ } __btf_array_show(btf, t, type_id, data, bits_offset, show); } static struct btf_kind_operations array_ops = { .check_meta = btf_array_check_meta, .resolve = btf_array_resolve, .check_member = btf_array_check_member, .check_kflag_member = btf_generic_check_kflag_member, .log_details = btf_array_log, .show = btf_array_show, }; static int btf_struct_check_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type) { u32 struct_bits_off = member->offset; u32 struct_size, bytes_offset; if (BITS_PER_BYTE_MASKED(struct_bits_off)) { btf_verifier_log_member(env, struct_type, member, "Member is not byte aligned"); return -EINVAL; } struct_size = struct_type->size; bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); if (struct_size - bytes_offset < member_type->size) { btf_verifier_log_member(env, struct_type, member, "Member exceeds struct_size"); return -EINVAL; } return 0; } static s32 btf_struct_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { bool is_union = BTF_INFO_KIND(t->info) == BTF_KIND_UNION; const struct btf_member *member; u32 meta_needed, last_offset; struct btf *btf = env->btf; u32 struct_size = t->size; u32 offset; u16 i; meta_needed = btf_type_vlen(t) * sizeof(*member); if (meta_left < meta_needed) { btf_verifier_log_basic(env, t, "meta_left:%u meta_needed:%u", meta_left, meta_needed); return -EINVAL; } /* struct type either no name or a valid one */ if (t->name_off && !btf_name_valid_identifier(env->btf, t->name_off)) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } btf_verifier_log_type(env, t, NULL); last_offset = 0; for_each_member(i, t, member) { if (!btf_name_offset_valid(btf, member->name_off)) { btf_verifier_log_member(env, t, member, "Invalid member name_offset:%u", member->name_off); return -EINVAL; } /* struct member either no name or a valid one */ if (member->name_off && !btf_name_valid_identifier(btf, member->name_off)) { btf_verifier_log_member(env, t, member, "Invalid name"); return -EINVAL; } /* A member cannot be in type void */ if (!member->type || !BTF_TYPE_ID_VALID(member->type)) { btf_verifier_log_member(env, t, member, "Invalid type_id"); return -EINVAL; } offset = __btf_member_bit_offset(t, member); if (is_union && offset) { btf_verifier_log_member(env, t, member, "Invalid member bits_offset"); return -EINVAL; } /* * ">" instead of ">=" because the last member could be * "char a[0];" */ if (last_offset > offset) { btf_verifier_log_member(env, t, member, "Invalid member bits_offset"); return -EINVAL; } if (BITS_ROUNDUP_BYTES(offset) > struct_size) { btf_verifier_log_member(env, t, member, "Member bits_offset exceeds its struct size"); return -EINVAL; } btf_verifier_log_member(env, t, member, NULL); last_offset = offset; } return meta_needed; } static int btf_struct_resolve(struct btf_verifier_env *env, const struct resolve_vertex *v) { const struct btf_member *member; int err; u16 i; /* Before continue resolving the next_member, * ensure the last member is indeed resolved to a * type with size info. */ if (v->next_member) { const struct btf_type *last_member_type; const struct btf_member *last_member; u32 last_member_type_id; last_member = btf_type_member(v->t) + v->next_member - 1; last_member_type_id = last_member->type; if (WARN_ON_ONCE(!env_type_is_resolved(env, last_member_type_id))) return -EINVAL; last_member_type = btf_type_by_id(env->btf, last_member_type_id); if (btf_type_kflag(v->t)) err = btf_type_ops(last_member_type)->check_kflag_member(env, v->t, last_member, last_member_type); else err = btf_type_ops(last_member_type)->check_member(env, v->t, last_member, last_member_type); if (err) return err; } for_each_member_from(i, v->next_member, v->t, member) { u32 member_type_id = member->type; const struct btf_type *member_type = btf_type_by_id(env->btf, member_type_id); if (btf_type_nosize_or_null(member_type) || btf_type_is_resolve_source_only(member_type)) { btf_verifier_log_member(env, v->t, member, "Invalid member"); return -EINVAL; } if (!env_type_is_resolve_sink(env, member_type) && !env_type_is_resolved(env, member_type_id)) { env_stack_set_next_member(env, i + 1); return env_stack_push(env, member_type, member_type_id); } if (btf_type_kflag(v->t)) err = btf_type_ops(member_type)->check_kflag_member(env, v->t, member, member_type); else err = btf_type_ops(member_type)->check_member(env, v->t, member, member_type); if (err) return err; } env_stack_pop_resolved(env, 0, 0); return 0; } static void btf_struct_log(struct btf_verifier_env *env, const struct btf_type *t) { btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } enum { BTF_FIELD_IGNORE = 0, BTF_FIELD_FOUND = 1, }; struct btf_field_info { enum btf_field_type type; u32 off; union { struct { u32 type_id; } kptr; struct { const char *node_name; u32 value_btf_id; } graph_root; }; }; static int btf_find_struct(const struct btf *btf, const struct btf_type *t, u32 off, int sz, enum btf_field_type field_type, struct btf_field_info *info) { if (!__btf_type_is_struct(t)) return BTF_FIELD_IGNORE; if (t->size != sz) return BTF_FIELD_IGNORE; info->type = field_type; info->off = off; return BTF_FIELD_FOUND; } static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, u32 off, int sz, struct btf_field_info *info) { enum btf_field_type type; u32 res_id; /* Permit modifiers on the pointer itself */ if (btf_type_is_volatile(t)) t = btf_type_by_id(btf, t->type); /* For PTR, sz is always == 8 */ if (!btf_type_is_ptr(t)) return BTF_FIELD_IGNORE; t = btf_type_by_id(btf, t->type); if (!btf_type_is_type_tag(t)) return BTF_FIELD_IGNORE; /* Reject extra tags */ if (btf_type_is_type_tag(btf_type_by_id(btf, t->type))) return -EINVAL; if (!strcmp("kptr_untrusted", __btf_name_by_offset(btf, t->name_off))) type = BPF_KPTR_UNREF; else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off))) type = BPF_KPTR_REF; else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off))) type = BPF_KPTR_PERCPU; else return -EINVAL; /* Get the base type */ t = btf_type_skip_modifiers(btf, t->type, &res_id); /* Only pointer to struct is allowed */ if (!__btf_type_is_struct(t)) return -EINVAL; info->type = type; info->off = off; info->kptr.type_id = res_id; return BTF_FIELD_FOUND; } const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, int comp_idx, const char *tag_key) { const char *value = NULL; int i; for (i = 1; i < btf_nr_types(btf); i++) { const struct btf_type *t = btf_type_by_id(btf, i); int len = strlen(tag_key); if (!btf_type_is_decl_tag(t)) continue; if (pt != btf_type_by_id(btf, t->type) || btf_type_decl_tag(t)->component_idx != comp_idx) continue; if (strncmp(__btf_name_by_offset(btf, t->name_off), tag_key, len)) continue; /* Prevent duplicate entries for same type */ if (value) return ERR_PTR(-EEXIST); value = __btf_name_by_offset(btf, t->name_off) + len; } if (!value) return ERR_PTR(-ENOENT); return value; } static int btf_find_graph_root(const struct btf *btf, const struct btf_type *pt, const struct btf_type *t, int comp_idx, u32 off, int sz, struct btf_field_info *info, enum btf_field_type head_type) { const char *node_field_name; const char *value_type; s32 id; if (!__btf_type_is_struct(t)) return BTF_FIELD_IGNORE; if (t->size != sz) return BTF_FIELD_IGNORE; value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:"); if (IS_ERR(value_type)) return -EINVAL; node_field_name = strstr(value_type, ":"); if (!node_field_name) return -EINVAL; value_type = kstrndup(value_type, node_field_name - value_type, GFP_KERNEL | __GFP_NOWARN); if (!value_type) return -ENOMEM; id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT); kfree(value_type); if (id < 0) return id; node_field_name++; if (str_is_empty(node_field_name)) return -EINVAL; info->type = head_type; info->off = off; info->graph_root.value_btf_id = id; info->graph_root.node_name = node_field_name; return BTF_FIELD_FOUND; } #define field_mask_test_name(field_type, field_type_str) \ if (field_mask & field_type && !strcmp(name, field_type_str)) { \ type = field_type; \ goto end; \ } static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask, int *align, int *sz) { int type = 0; if (field_mask & BPF_SPIN_LOCK) { if (!strcmp(name, "bpf_spin_lock")) { if (*seen_mask & BPF_SPIN_LOCK) return -E2BIG; *seen_mask |= BPF_SPIN_LOCK; type = BPF_SPIN_LOCK; goto end; } } if (field_mask & BPF_TIMER) { if (!strcmp(name, "bpf_timer")) { if (*seen_mask & BPF_TIMER) return -E2BIG; *seen_mask |= BPF_TIMER; type = BPF_TIMER; goto end; } } field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head"); field_mask_test_name(BPF_LIST_NODE, "bpf_list_node"); field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root"); field_mask_test_name(BPF_RB_NODE, "bpf_rb_node"); field_mask_test_name(BPF_REFCOUNT, "bpf_refcount"); /* Only return BPF_KPTR when all other types with matchable names fail */ if (field_mask & BPF_KPTR) { type = BPF_KPTR_REF; goto end; } return 0; end: *sz = btf_field_type_size(type); *align = btf_field_type_align(type); return type; } #undef field_mask_test_name static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t, u32 field_mask, struct btf_field_info *info, int info_cnt) { int ret, idx = 0, align, sz, field_type; const struct btf_member *member; struct btf_field_info tmp; u32 i, off, seen_mask = 0; for_each_member(i, t, member) { const struct btf_type *member_type = btf_type_by_id(btf, member->type); field_type = btf_get_field_type(__btf_name_by_offset(btf, member_type->name_off), field_mask, &seen_mask, &align, &sz); if (field_type == 0) continue; if (field_type < 0) return field_type; off = __btf_member_bit_offset(t, member); if (off % 8) /* valid C code cannot generate such BTF */ return -EINVAL; off /= 8; if (off % align) continue; switch (field_type) { case BPF_SPIN_LOCK: case BPF_TIMER: case BPF_LIST_NODE: case BPF_RB_NODE: case BPF_REFCOUNT: ret = btf_find_struct(btf, member_type, off, sz, field_type, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) return ret; break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: ret = btf_find_kptr(btf, member_type, off, sz, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) return ret; break; case BPF_LIST_HEAD: case BPF_RB_ROOT: ret = btf_find_graph_root(btf, t, member_type, i, off, sz, idx < info_cnt ? &info[idx] : &tmp, field_type); if (ret < 0) return ret; break; default: return -EFAULT; } if (ret == BTF_FIELD_IGNORE) continue; if (idx >= info_cnt) return -E2BIG; ++idx; } return idx; } static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, u32 field_mask, struct btf_field_info *info, int info_cnt) { int ret, idx = 0, align, sz, field_type; const struct btf_var_secinfo *vsi; struct btf_field_info tmp; u32 i, off, seen_mask = 0; for_each_vsi(i, t, vsi) { const struct btf_type *var = btf_type_by_id(btf, vsi->type); const struct btf_type *var_type = btf_type_by_id(btf, var->type); field_type = btf_get_field_type(__btf_name_by_offset(btf, var_type->name_off), field_mask, &seen_mask, &align, &sz); if (field_type == 0) continue; if (field_type < 0) return field_type; off = vsi->offset; if (vsi->size != sz) continue; if (off % align) continue; switch (field_type) { case BPF_SPIN_LOCK: case BPF_TIMER: case BPF_LIST_NODE: case BPF_RB_NODE: case BPF_REFCOUNT: ret = btf_find_struct(btf, var_type, off, sz, field_type, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) return ret; break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: ret = btf_find_kptr(btf, var_type, off, sz, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) return ret; break; case BPF_LIST_HEAD: case BPF_RB_ROOT: ret = btf_find_graph_root(btf, var, var_type, -1, off, sz, idx < info_cnt ? &info[idx] : &tmp, field_type); if (ret < 0) return ret; break; default: return -EFAULT; } if (ret == BTF_FIELD_IGNORE) continue; if (idx >= info_cnt) return -E2BIG; ++idx; } return idx; } static int btf_find_field(const struct btf *btf, const struct btf_type *t, u32 field_mask, struct btf_field_info *info, int info_cnt) { if (__btf_type_is_struct(t)) return btf_find_struct_field(btf, t, field_mask, info, info_cnt); else if (btf_type_is_datasec(t)) return btf_find_datasec_var(btf, t, field_mask, info, info_cnt); return -EINVAL; } static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, struct btf_field_info *info) { struct module *mod = NULL; const struct btf_type *t; /* If a matching btf type is found in kernel or module BTFs, kptr_ref * is that BTF, otherwise it's program BTF */ struct btf *kptr_btf; int ret; s32 id; /* Find type in map BTF, and use it to look up the matching type * in vmlinux or module BTFs, by name and kind. */ t = btf_type_by_id(btf, info->kptr.type_id); id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info), &kptr_btf); if (id == -ENOENT) { /* btf_parse_kptr should only be called w/ btf = program BTF */ WARN_ON_ONCE(btf_is_kernel(btf)); /* Type exists only in program BTF. Assume that it's a MEM_ALLOC * kptr allocated via bpf_obj_new */ field->kptr.dtor = NULL; id = info->kptr.type_id; kptr_btf = (struct btf *)btf; btf_get(kptr_btf); goto found_dtor; } if (id < 0) return id; /* Find and stash the function pointer for the destruction function that * needs to be eventually invoked from the map free path. */ if (info->type == BPF_KPTR_REF) { const struct btf_type *dtor_func; const char *dtor_func_name; unsigned long addr; s32 dtor_btf_id; /* This call also serves as a whitelist of allowed objects that * can be used as a referenced pointer and be stored in a map at * the same time. */ dtor_btf_id = btf_find_dtor_kfunc(kptr_btf, id); if (dtor_btf_id < 0) { ret = dtor_btf_id; goto end_btf; } dtor_func = btf_type_by_id(kptr_btf, dtor_btf_id); if (!dtor_func) { ret = -ENOENT; goto end_btf; } if (btf_is_module(kptr_btf)) { mod = btf_try_get_module(kptr_btf); if (!mod) { ret = -ENXIO; goto end_btf; } } /* We already verified dtor_func to be btf_type_is_func * in register_btf_id_dtor_kfuncs. */ dtor_func_name = __btf_name_by_offset(kptr_btf, dtor_func->name_off); addr = kallsyms_lookup_name(dtor_func_name); if (!addr) { ret = -EINVAL; goto end_mod; } field->kptr.dtor = (void *)addr; } found_dtor: field->kptr.btf_id = id; field->kptr.btf = kptr_btf; field->kptr.module = mod; return 0; end_mod: module_put(mod); end_btf: btf_put(kptr_btf); return ret; } static int btf_parse_graph_root(const struct btf *btf, struct btf_field *field, struct btf_field_info *info, const char *node_type_name, size_t node_type_align) { const struct btf_type *t, *n = NULL; const struct btf_member *member; u32 offset; int i; t = btf_type_by_id(btf, info->graph_root.value_btf_id); /* We've already checked that value_btf_id is a struct type. We * just need to figure out the offset of the list_node, and * verify its type. */ for_each_member(i, t, member) { if (strcmp(info->graph_root.node_name, __btf_name_by_offset(btf, member->name_off))) continue; /* Invalid BTF, two members with same name */ if (n) return -EINVAL; n = btf_type_by_id(btf, member->type); if (!__btf_type_is_struct(n)) return -EINVAL; if (strcmp(node_type_name, __btf_name_by_offset(btf, n->name_off))) return -EINVAL; offset = __btf_member_bit_offset(n, member); if (offset % 8) return -EINVAL; offset /= 8; if (offset % node_type_align) return -EINVAL; field->graph_root.btf = (struct btf *)btf; field->graph_root.value_btf_id = info->graph_root.value_btf_id; field->graph_root.node_offset = offset; } if (!n) return -ENOENT; return 0; } static int btf_parse_list_head(const struct btf *btf, struct btf_field *field, struct btf_field_info *info) { return btf_parse_graph_root(btf, field, info, "bpf_list_node", __alignof__(struct bpf_list_node)); } static int btf_parse_rb_root(const struct btf *btf, struct btf_field *field, struct btf_field_info *info) { return btf_parse_graph_root(btf, field, info, "bpf_rb_node", __alignof__(struct bpf_rb_node)); } static int btf_field_cmp(const void *_a, const void *_b, const void *priv) { const struct btf_field *a = (const struct btf_field *)_a; const struct btf_field *b = (const struct btf_field *)_b; if (a->offset < b->offset) return -1; else if (a->offset > b->offset) return 1; return 0; } struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, u32 field_mask, u32 value_size) { struct btf_field_info info_arr[BTF_FIELDS_MAX]; u32 next_off = 0, field_type_size; struct btf_record *rec; int ret, i, cnt; ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr)); if (ret < 0) return ERR_PTR(ret); if (!ret) return NULL; cnt = ret; /* This needs to be kzalloc to zero out padding and unused fields, see * comment in btf_record_equal. */ rec = kzalloc(offsetof(struct btf_record, fields[cnt]), GFP_KERNEL | __GFP_NOWARN); if (!rec) return ERR_PTR(-ENOMEM); rec->spin_lock_off = -EINVAL; rec->timer_off = -EINVAL; rec->refcount_off = -EINVAL; for (i = 0; i < cnt; i++) { field_type_size = btf_field_type_size(info_arr[i].type); if (info_arr[i].off + field_type_size > value_size) { WARN_ONCE(1, "verifier bug off %d size %d", info_arr[i].off, value_size); ret = -EFAULT; goto end; } if (info_arr[i].off < next_off) { ret = -EEXIST; goto end; } next_off = info_arr[i].off + field_type_size; rec->field_mask |= info_arr[i].type; rec->fields[i].offset = info_arr[i].off; rec->fields[i].type = info_arr[i].type; rec->fields[i].size = field_type_size; switch (info_arr[i].type) { case BPF_SPIN_LOCK: WARN_ON_ONCE(rec->spin_lock_off >= 0); /* Cache offset for faster lookup at runtime */ rec->spin_lock_off = rec->fields[i].offset; break; case BPF_TIMER: WARN_ON_ONCE(rec->timer_off >= 0); /* Cache offset for faster lookup at runtime */ rec->timer_off = rec->fields[i].offset; break; case BPF_REFCOUNT: WARN_ON_ONCE(rec->refcount_off >= 0); /* Cache offset for faster lookup at runtime */ rec->refcount_off = rec->fields[i].offset; break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]); if (ret < 0) goto end; break; case BPF_LIST_HEAD: ret = btf_parse_list_head(btf, &rec->fields[i], &info_arr[i]); if (ret < 0) goto end; break; case BPF_RB_ROOT: ret = btf_parse_rb_root(btf, &rec->fields[i], &info_arr[i]); if (ret < 0) goto end; break; case BPF_LIST_NODE: case BPF_RB_NODE: break; default: ret = -EFAULT; goto end; } rec->cnt++; } /* bpf_{list_head, rb_node} require bpf_spin_lock */ if ((btf_record_has_field(rec, BPF_LIST_HEAD) || btf_record_has_field(rec, BPF_RB_ROOT)) && rec->spin_lock_off < 0) { ret = -EINVAL; goto end; } if (rec->refcount_off < 0 && btf_record_has_field(rec, BPF_LIST_NODE) && btf_record_has_field(rec, BPF_RB_NODE)) { ret = -EINVAL; goto end; } sort_r(rec->fields, rec->cnt, sizeof(struct btf_field), btf_field_cmp, NULL, rec); return rec; end: btf_record_free(rec); return ERR_PTR(ret); } #define GRAPH_ROOT_MASK (BPF_LIST_HEAD | BPF_RB_ROOT) #define GRAPH_NODE_MASK (BPF_LIST_NODE | BPF_RB_NODE) int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) { int i; /* There are three types that signify ownership of some other type: * kptr_ref, bpf_list_head, bpf_rb_root. * kptr_ref only supports storing kernel types, which can't store * references to program allocated local types. * * Hence we only need to ensure that bpf_{list_head,rb_root} ownership * does not form cycles. */ if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & GRAPH_ROOT_MASK)) return 0; for (i = 0; i < rec->cnt; i++) { struct btf_struct_meta *meta; u32 btf_id; if (!(rec->fields[i].type & GRAPH_ROOT_MASK)) continue; btf_id = rec->fields[i].graph_root.value_btf_id; meta = btf_find_struct_meta(btf, btf_id); if (!meta) return -EFAULT; rec->fields[i].graph_root.value_rec = meta->record; /* We need to set value_rec for all root types, but no need * to check ownership cycle for a type unless it's also a * node type. */ if (!(rec->field_mask & GRAPH_NODE_MASK)) continue; /* We need to ensure ownership acyclicity among all types. The * proper way to do it would be to topologically sort all BTF * IDs based on the ownership edges, since there can be multiple * bpf_{list_head,rb_node} in a type. Instead, we use the * following resaoning: * * - A type can only be owned by another type in user BTF if it * has a bpf_{list,rb}_node. Let's call these node types. * - A type can only _own_ another type in user BTF if it has a * bpf_{list_head,rb_root}. Let's call these root types. * * We ensure that if a type is both a root and node, its * element types cannot be root types. * * To ensure acyclicity: * * When A is an root type but not a node, its ownership * chain can be: * A -> B -> C * Where: * - A is an root, e.g. has bpf_rb_root. * - B is both a root and node, e.g. has bpf_rb_node and * bpf_list_head. * - C is only an root, e.g. has bpf_list_node * * When A is both a root and node, some other type already * owns it in the BTF domain, hence it can not own * another root type through any of the ownership edges. * A -> B * Where: * - A is both an root and node. * - B is only an node. */ if (meta->record->field_mask & GRAPH_ROOT_MASK) return -ELOOP; } return 0; } static void __btf_struct_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) { const struct btf_member *member; void *safe_data; u32 i; safe_data = btf_show_start_struct_type(show, t, type_id, data); if (!safe_data) return; for_each_member(i, t, member) { const struct btf_type *member_type = btf_type_by_id(btf, member->type); const struct btf_kind_operations *ops; u32 member_offset, bitfield_size; u32 bytes_offset; u8 bits8_offset; btf_show_start_member(show, member); member_offset = __btf_member_bit_offset(t, member); bitfield_size = __btf_member_bitfield_size(t, member); bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); bits8_offset = BITS_PER_BYTE_MASKED(member_offset); if (bitfield_size) { safe_data = btf_show_start_type(show, member_type, member->type, data + bytes_offset); if (safe_data) btf_bitfield_show(safe_data, bits8_offset, bitfield_size, show); btf_show_end_type(show); } else { ops = btf_type_ops(member_type); ops->show(btf, member_type, member->type, data + bytes_offset, bits8_offset, show); } btf_show_end_member(show); } btf_show_end_struct_type(show); } static void btf_struct_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) { const struct btf_member *m = show->state.member; /* * First check if any members would be shown (are non-zero). * See comments above "struct btf_show" definition for more * details on how this works at a high-level. */ if (show->state.depth > 0 && !(show->flags & BTF_SHOW_ZERO)) { if (!show->state.depth_check) { show->state.depth_check = show->state.depth + 1; show->state.depth_to_show = 0; } __btf_struct_show(btf, t, type_id, data, bits_offset, show); /* Restore saved member data here */ show->state.member = m; if (show->state.depth_check != show->state.depth + 1) return; show->state.depth_check = 0; if (show->state.depth_to_show <= show->state.depth) return; /* * Reaching here indicates we have recursed and found * non-zero child values. */ } __btf_struct_show(btf, t, type_id, data, bits_offset, show); } static struct btf_kind_operations struct_ops = { .check_meta = btf_struct_check_meta, .resolve = btf_struct_resolve, .check_member = btf_struct_check_member, .check_kflag_member = btf_generic_check_kflag_member, .log_details = btf_struct_log, .show = btf_struct_show, }; static int btf_enum_check_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type) { u32 struct_bits_off = member->offset; u32 struct_size, bytes_offset; if (BITS_PER_BYTE_MASKED(struct_bits_off)) { btf_verifier_log_member(env, struct_type, member, "Member is not byte aligned"); return -EINVAL; } struct_size = struct_type->size; bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); if (struct_size - bytes_offset < member_type->size) { btf_verifier_log_member(env, struct_type, member, "Member exceeds struct_size"); return -EINVAL; } return 0; } static int btf_enum_check_kflag_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type) { u32 struct_bits_off, nr_bits, bytes_end, struct_size; u32 int_bitsize = sizeof(int) * BITS_PER_BYTE; struct_bits_off = BTF_MEMBER_BIT_OFFSET(member->offset); nr_bits = BTF_MEMBER_BITFIELD_SIZE(member->offset); if (!nr_bits) { if (BITS_PER_BYTE_MASKED(struct_bits_off)) { btf_verifier_log_member(env, struct_type, member, "Member is not byte aligned"); return -EINVAL; } nr_bits = int_bitsize; } else if (nr_bits > int_bitsize) { btf_verifier_log_member(env, struct_type, member, "Invalid member bitfield_size"); return -EINVAL; } struct_size = struct_type->size; bytes_end = BITS_ROUNDUP_BYTES(struct_bits_off + nr_bits); if (struct_size < bytes_end) { btf_verifier_log_member(env, struct_type, member, "Member exceeds struct_size"); return -EINVAL; } return 0; } static s32 btf_enum_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { const struct btf_enum *enums = btf_type_enum(t); struct btf *btf = env->btf; const char *fmt_str; u16 i, nr_enums; u32 meta_needed; nr_enums = btf_type_vlen(t); meta_needed = nr_enums * sizeof(*enums); if (meta_left < meta_needed) { btf_verifier_log_basic(env, t, "meta_left:%u meta_needed:%u", meta_left, meta_needed); return -EINVAL; } if (t->size > 8 || !is_power_of_2(t->size)) { btf_verifier_log_type(env, t, "Unexpected size"); return -EINVAL; } /* enum type either no name or a valid one */ if (t->name_off && !btf_name_valid_identifier(env->btf, t->name_off)) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } btf_verifier_log_type(env, t, NULL); for (i = 0; i < nr_enums; i++) { if (!btf_name_offset_valid(btf, enums[i].name_off)) { btf_verifier_log(env, "\tInvalid name_offset:%u", enums[i].name_off); return -EINVAL; } /* enum member must have a valid name */ if (!enums[i].name_off || !btf_name_valid_identifier(btf, enums[i].name_off)) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } if (env->log.level == BPF_LOG_KERNEL) continue; fmt_str = btf_type_kflag(t) ? "\t%s val=%d\n" : "\t%s val=%u\n"; btf_verifier_log(env, fmt_str, __btf_name_by_offset(btf, enums[i].name_off), enums[i].val); } return meta_needed; } static void btf_enum_log(struct btf_verifier_env *env, const struct btf_type *t) { btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } static void btf_enum_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) { const struct btf_enum *enums = btf_type_enum(t); u32 i, nr_enums = btf_type_vlen(t); void *safe_data; int v; safe_data = btf_show_start_type(show, t, type_id, data); if (!safe_data) return; v = *(int *)safe_data; for (i = 0; i < nr_enums; i++) { if (v != enums[i].val) continue; btf_show_type_value(show, "%s", __btf_name_by_offset(btf, enums[i].name_off)); btf_show_end_type(show); return; } if (btf_type_kflag(t)) btf_show_type_value(show, "%d", v); else btf_show_type_value(show, "%u", v); btf_show_end_type(show); } static struct btf_kind_operations enum_ops = { .check_meta = btf_enum_check_meta, .resolve = btf_df_resolve, .check_member = btf_enum_check_member, .check_kflag_member = btf_enum_check_kflag_member, .log_details = btf_enum_log, .show = btf_enum_show, }; static s32 btf_enum64_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { const struct btf_enum64 *enums = btf_type_enum64(t); struct btf *btf = env->btf; const char *fmt_str; u16 i, nr_enums; u32 meta_needed; nr_enums = btf_type_vlen(t); meta_needed = nr_enums * sizeof(*enums); if (meta_left < meta_needed) { btf_verifier_log_basic(env, t, "meta_left:%u meta_needed:%u", meta_left, meta_needed); return -EINVAL; } if (t->size > 8 || !is_power_of_2(t->size)) { btf_verifier_log_type(env, t, "Unexpected size"); return -EINVAL; } /* enum type either no name or a valid one */ if (t->name_off && !btf_name_valid_identifier(env->btf, t->name_off)) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } btf_verifier_log_type(env, t, NULL); for (i = 0; i < nr_enums; i++) { if (!btf_name_offset_valid(btf, enums[i].name_off)) { btf_verifier_log(env, "\tInvalid name_offset:%u", enums[i].name_off); return -EINVAL; } /* enum member must have a valid name */ if (!enums[i].name_off || !btf_name_valid_identifier(btf, enums[i].name_off)) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } if (env->log.level == BPF_LOG_KERNEL) continue; fmt_str = btf_type_kflag(t) ? "\t%s val=%lld\n" : "\t%s val=%llu\n"; btf_verifier_log(env, fmt_str, __btf_name_by_offset(btf, enums[i].name_off), btf_enum64_value(enums + i)); } return meta_needed; } static void btf_enum64_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) { const struct btf_enum64 *enums = btf_type_enum64(t); u32 i, nr_enums = btf_type_vlen(t); void *safe_data; s64 v; safe_data = btf_show_start_type(show, t, type_id, data); if (!safe_data) return; v = *(u64 *)safe_data; for (i = 0; i < nr_enums; i++) { if (v != btf_enum64_value(enums + i)) continue; btf_show_type_value(show, "%s", __btf_name_by_offset(btf, enums[i].name_off)); btf_show_end_type(show); return; } if (btf_type_kflag(t)) btf_show_type_value(show, "%lld", v); else btf_show_type_value(show, "%llu", v); btf_show_end_type(show); } static struct btf_kind_operations enum64_ops = { .check_meta = btf_enum64_check_meta, .resolve = btf_df_resolve, .check_member = btf_enum_check_member, .check_kflag_member = btf_enum_check_kflag_member, .log_details = btf_enum_log, .show = btf_enum64_show, }; static s32 btf_func_proto_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { u32 meta_needed = btf_type_vlen(t) * sizeof(struct btf_param); if (meta_left < meta_needed) { btf_verifier_log_basic(env, t, "meta_left:%u meta_needed:%u", meta_left, meta_needed); return -EINVAL; } if (t->name_off) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } if (btf_type_kflag(t)) { btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); return -EINVAL; } btf_verifier_log_type(env, t, NULL); return meta_needed; } static void btf_func_proto_log(struct btf_verifier_env *env, const struct btf_type *t) { const struct btf_param *args = (const struct btf_param *)(t + 1); u16 nr_args = btf_type_vlen(t), i; btf_verifier_log(env, "return=%u args=(", t->type); if (!nr_args) { btf_verifier_log(env, "void"); goto done; } if (nr_args == 1 && !args[0].type) { /* Only one vararg */ btf_verifier_log(env, "vararg"); goto done; } btf_verifier_log(env, "%u %s", args[0].type, __btf_name_by_offset(env->btf, args[0].name_off)); for (i = 1; i < nr_args - 1; i++) btf_verifier_log(env, ", %u %s", args[i].type, __btf_name_by_offset(env->btf, args[i].name_off)); if (nr_args > 1) { const struct btf_param *last_arg = &args[nr_args - 1]; if (last_arg->type) btf_verifier_log(env, ", %u %s", last_arg->type, __btf_name_by_offset(env->btf, last_arg->name_off)); else btf_verifier_log(env, ", vararg"); } done: btf_verifier_log(env, ")"); } static struct btf_kind_operations func_proto_ops = { .check_meta = btf_func_proto_check_meta, .resolve = btf_df_resolve, /* * BTF_KIND_FUNC_PROTO cannot be directly referred by * a struct's member. * * It should be a function pointer instead. * (i.e. struct's member -> BTF_KIND_PTR -> BTF_KIND_FUNC_PROTO) * * Hence, there is no btf_func_check_member(). */ .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_func_proto_log, .show = btf_df_show, }; static s32 btf_func_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { if (!t->name_off || !btf_name_valid_identifier(env->btf, t->name_off)) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } if (btf_type_vlen(t) > BTF_FUNC_GLOBAL) { btf_verifier_log_type(env, t, "Invalid func linkage"); return -EINVAL; } if (btf_type_kflag(t)) { btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); return -EINVAL; } btf_verifier_log_type(env, t, NULL); return 0; } static int btf_func_resolve(struct btf_verifier_env *env, const struct resolve_vertex *v) { const struct btf_type *t = v->t; u32 next_type_id = t->type; int err; err = btf_func_check(env, t); if (err) return err; env_stack_pop_resolved(env, next_type_id, 0); return 0; } static struct btf_kind_operations func_ops = { .check_meta = btf_func_check_meta, .resolve = btf_func_resolve, .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_ref_type_log, .show = btf_df_show, }; static s32 btf_var_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { const struct btf_var *var; u32 meta_needed = sizeof(*var); if (meta_left < meta_needed) { btf_verifier_log_basic(env, t, "meta_left:%u meta_needed:%u", meta_left, meta_needed); return -EINVAL; } if (btf_type_vlen(t)) { btf_verifier_log_type(env, t, "vlen != 0"); return -EINVAL; } if (btf_type_kflag(t)) { btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); return -EINVAL; } if (!t->name_off || !__btf_name_valid(env->btf, t->name_off)) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } /* A var cannot be in type void */ if (!t->type || !BTF_TYPE_ID_VALID(t->type)) { btf_verifier_log_type(env, t, "Invalid type_id"); return -EINVAL; } var = btf_type_var(t); if (var->linkage != BTF_VAR_STATIC && var->linkage != BTF_VAR_GLOBAL_ALLOCATED) { btf_verifier_log_type(env, t, "Linkage not supported"); return -EINVAL; } btf_verifier_log_type(env, t, NULL); return meta_needed; } static void btf_var_log(struct btf_verifier_env *env, const struct btf_type *t) { const struct btf_var *var = btf_type_var(t); btf_verifier_log(env, "type_id=%u linkage=%u", t->type, var->linkage); } static const struct btf_kind_operations var_ops = { .check_meta = btf_var_check_meta, .resolve = btf_var_resolve, .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_var_log, .show = btf_var_show, }; static s32 btf_datasec_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { const struct btf_var_secinfo *vsi; u64 last_vsi_end_off = 0, sum = 0; u32 i, meta_needed; meta_needed = btf_type_vlen(t) * sizeof(*vsi); if (meta_left < meta_needed) { btf_verifier_log_basic(env, t, "meta_left:%u meta_needed:%u", meta_left, meta_needed); return -EINVAL; } if (!t->size) { btf_verifier_log_type(env, t, "size == 0"); return -EINVAL; } if (btf_type_kflag(t)) { btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); return -EINVAL; } if (!t->name_off || !btf_name_valid_section(env->btf, t->name_off)) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } btf_verifier_log_type(env, t, NULL); for_each_vsi(i, t, vsi) { /* A var cannot be in type void */ if (!vsi->type || !BTF_TYPE_ID_VALID(vsi->type)) { btf_verifier_log_vsi(env, t, vsi, "Invalid type_id"); return -EINVAL; } if (vsi->offset < last_vsi_end_off || vsi->offset >= t->size) { btf_verifier_log_vsi(env, t, vsi, "Invalid offset"); return -EINVAL; } if (!vsi->size || vsi->size > t->size) { btf_verifier_log_vsi(env, t, vsi, "Invalid size"); return -EINVAL; } last_vsi_end_off = vsi->offset + vsi->size; if (last_vsi_end_off > t->size) { btf_verifier_log_vsi(env, t, vsi, "Invalid offset+size"); return -EINVAL; } btf_verifier_log_vsi(env, t, vsi, NULL); sum += vsi->size; } if (t->size < sum) { btf_verifier_log_type(env, t, "Invalid btf_info size"); return -EINVAL; } return meta_needed; } static int btf_datasec_resolve(struct btf_verifier_env *env, const struct resolve_vertex *v) { const struct btf_var_secinfo *vsi; struct btf *btf = env->btf; u16 i; env->resolve_mode = RESOLVE_TBD; for_each_vsi_from(i, v->next_member, v->t, vsi) { u32 var_type_id = vsi->type, type_id, type_size = 0; const struct btf_type *var_type = btf_type_by_id(env->btf, var_type_id); if (!var_type || !btf_type_is_var(var_type)) { btf_verifier_log_vsi(env, v->t, vsi, "Not a VAR kind member"); return -EINVAL; } if (!env_type_is_resolve_sink(env, var_type) && !env_type_is_resolved(env, var_type_id)) { env_stack_set_next_member(env, i + 1); return env_stack_push(env, var_type, var_type_id); } type_id = var_type->type; if (!btf_type_id_size(btf, &type_id, &type_size)) { btf_verifier_log_vsi(env, v->t, vsi, "Invalid type"); return -EINVAL; } if (vsi->size < type_size) { btf_verifier_log_vsi(env, v->t, vsi, "Invalid size"); return -EINVAL; } } env_stack_pop_resolved(env, 0, 0); return 0; } static void btf_datasec_log(struct btf_verifier_env *env, const struct btf_type *t) { btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } static void btf_datasec_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) { const struct btf_var_secinfo *vsi; const struct btf_type *var; u32 i; if (!btf_show_start_type(show, t, type_id, data)) return; btf_show_type_value(show, "section (\"%s\") = {", __btf_name_by_offset(btf, t->name_off)); for_each_vsi(i, t, vsi) { var = btf_type_by_id(btf, vsi->type); if (i) btf_show(show, ","); btf_type_ops(var)->show(btf, var, vsi->type, data + vsi->offset, bits_offset, show); } btf_show_end_type(show); } static const struct btf_kind_operations datasec_ops = { .check_meta = btf_datasec_check_meta, .resolve = btf_datasec_resolve, .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_datasec_log, .show = btf_datasec_show, }; static s32 btf_float_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { if (btf_type_vlen(t)) { btf_verifier_log_type(env, t, "vlen != 0"); return -EINVAL; } if (btf_type_kflag(t)) { btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); return -EINVAL; } if (t->size != 2 && t->size != 4 && t->size != 8 && t->size != 12 && t->size != 16) { btf_verifier_log_type(env, t, "Invalid type_size"); return -EINVAL; } btf_verifier_log_type(env, t, NULL); return 0; } static int btf_float_check_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type) { u64 start_offset_bytes; u64 end_offset_bytes; u64 misalign_bits; u64 align_bytes; u64 align_bits; /* Different architectures have different alignment requirements, so * here we check only for the reasonable minimum. This way we ensure * that types after CO-RE can pass the kernel BTF verifier. */ align_bytes = min_t(u64, sizeof(void *), member_type->size); align_bits = align_bytes * BITS_PER_BYTE; div64_u64_rem(member->offset, align_bits, &misalign_bits); if (misalign_bits) { btf_verifier_log_member(env, struct_type, member, "Member is not properly aligned"); return -EINVAL; } start_offset_bytes = member->offset / BITS_PER_BYTE; end_offset_bytes = start_offset_bytes + member_type->size; if (end_offset_bytes > struct_type->size) { btf_verifier_log_member(env, struct_type, member, "Member exceeds struct_size"); return -EINVAL; } return 0; } static void btf_float_log(struct btf_verifier_env *env, const struct btf_type *t) { btf_verifier_log(env, "size=%u", t->size); } static const struct btf_kind_operations float_ops = { .check_meta = btf_float_check_meta, .resolve = btf_df_resolve, .check_member = btf_float_check_member, .check_kflag_member = btf_generic_check_kflag_member, .log_details = btf_float_log, .show = btf_df_show, }; static s32 btf_decl_tag_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { const struct btf_decl_tag *tag; u32 meta_needed = sizeof(*tag); s32 component_idx; const char *value; if (meta_left < meta_needed) { btf_verifier_log_basic(env, t, "meta_left:%u meta_needed:%u", meta_left, meta_needed); return -EINVAL; } value = btf_name_by_offset(env->btf, t->name_off); if (!value || !value[0]) { btf_verifier_log_type(env, t, "Invalid value"); return -EINVAL; } if (btf_type_vlen(t)) { btf_verifier_log_type(env, t, "vlen != 0"); return -EINVAL; } if (btf_type_kflag(t)) { btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); return -EINVAL; } component_idx = btf_type_decl_tag(t)->component_idx; if (component_idx < -1) { btf_verifier_log_type(env, t, "Invalid component_idx"); return -EINVAL; } btf_verifier_log_type(env, t, NULL); return meta_needed; } static int btf_decl_tag_resolve(struct btf_verifier_env *env, const struct resolve_vertex *v) { const struct btf_type *next_type; const struct btf_type *t = v->t; u32 next_type_id = t->type; struct btf *btf = env->btf; s32 component_idx; u32 vlen; next_type = btf_type_by_id(btf, next_type_id); if (!next_type || !btf_type_is_decl_tag_target(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } if (!env_type_is_resolve_sink(env, next_type) && !env_type_is_resolved(env, next_type_id)) return env_stack_push(env, next_type, next_type_id); component_idx = btf_type_decl_tag(t)->component_idx; if (component_idx != -1) { if (btf_type_is_var(next_type) || btf_type_is_typedef(next_type)) { btf_verifier_log_type(env, v->t, "Invalid component_idx"); return -EINVAL; } if (btf_type_is_struct(next_type)) { vlen = btf_type_vlen(next_type); } else { /* next_type should be a function */ next_type = btf_type_by_id(btf, next_type->type); vlen = btf_type_vlen(next_type); } if ((u32)component_idx >= vlen) { btf_verifier_log_type(env, v->t, "Invalid component_idx"); return -EINVAL; } } env_stack_pop_resolved(env, next_type_id, 0); return 0; } static void btf_decl_tag_log(struct btf_verifier_env *env, const struct btf_type *t) { btf_verifier_log(env, "type=%u component_idx=%d", t->type, btf_type_decl_tag(t)->component_idx); } static const struct btf_kind_operations decl_tag_ops = { .check_meta = btf_decl_tag_check_meta, .resolve = btf_decl_tag_resolve, .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_decl_tag_log, .show = btf_df_show, }; static int btf_func_proto_check(struct btf_verifier_env *env, const struct btf_type *t) { const struct btf_type *ret_type; const struct btf_param *args; const struct btf *btf; u16 nr_args, i; int err; btf = env->btf; args = (const struct btf_param *)(t + 1); nr_args = btf_type_vlen(t); /* Check func return type which could be "void" (t->type == 0) */ if (t->type) { u32 ret_type_id = t->type; ret_type = btf_type_by_id(btf, ret_type_id); if (!ret_type) { btf_verifier_log_type(env, t, "Invalid return type"); return -EINVAL; } if (btf_type_is_resolve_source_only(ret_type)) { btf_verifier_log_type(env, t, "Invalid return type"); return -EINVAL; } if (btf_type_needs_resolve(ret_type) && !env_type_is_resolved(env, ret_type_id)) { err = btf_resolve(env, ret_type, ret_type_id); if (err) return err; } /* Ensure the return type is a type that has a size */ if (!btf_type_id_size(btf, &ret_type_id, NULL)) { btf_verifier_log_type(env, t, "Invalid return type"); return -EINVAL; } } if (!nr_args) return 0; /* Last func arg type_id could be 0 if it is a vararg */ if (!args[nr_args - 1].type) { if (args[nr_args - 1].name_off) { btf_verifier_log_type(env, t, "Invalid arg#%u", nr_args); return -EINVAL; } nr_args--; } for (i = 0; i < nr_args; i++) { const struct btf_type *arg_type; u32 arg_type_id; arg_type_id = args[i].type; arg_type = btf_type_by_id(btf, arg_type_id); if (!arg_type) { btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); return -EINVAL; } if (btf_type_is_resolve_source_only(arg_type)) { btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); return -EINVAL; } if (args[i].name_off && (!btf_name_offset_valid(btf, args[i].name_off) || !btf_name_valid_identifier(btf, args[i].name_off))) { btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); return -EINVAL; } if (btf_type_needs_resolve(arg_type) && !env_type_is_resolved(env, arg_type_id)) { err = btf_resolve(env, arg_type, arg_type_id); if (err) return err; } if (!btf_type_id_size(btf, &arg_type_id, NULL)) { btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); return -EINVAL; } } return 0; } static int btf_func_check(struct btf_verifier_env *env, const struct btf_type *t) { const struct btf_type *proto_type; const struct btf_param *args; const struct btf *btf; u16 nr_args, i; btf = env->btf; proto_type = btf_type_by_id(btf, t->type); if (!proto_type || !btf_type_is_func_proto(proto_type)) { btf_verifier_log_type(env, t, "Invalid type_id"); return -EINVAL; } args = (const struct btf_param *)(proto_type + 1); nr_args = btf_type_vlen(proto_type); for (i = 0; i < nr_args; i++) { if (!args[i].name_off && args[i].type) { btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); return -EINVAL; } } return 0; } static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { [BTF_KIND_INT] = &int_ops, [BTF_KIND_PTR] = &ptr_ops, [BTF_KIND_ARRAY] = &array_ops, [BTF_KIND_STRUCT] = &struct_ops, [BTF_KIND_UNION] = &struct_ops, [BTF_KIND_ENUM] = &enum_ops, [BTF_KIND_FWD] = &fwd_ops, [BTF_KIND_TYPEDEF] = &modifier_ops, [BTF_KIND_VOLATILE] = &modifier_ops, [BTF_KIND_CONST] = &modifier_ops, [BTF_KIND_RESTRICT] = &modifier_ops, [BTF_KIND_FUNC] = &func_ops, [BTF_KIND_FUNC_PROTO] = &func_proto_ops, [BTF_KIND_VAR] = &var_ops, [BTF_KIND_DATASEC] = &datasec_ops, [BTF_KIND_FLOAT] = &float_ops, [BTF_KIND_DECL_TAG] = &decl_tag_ops, [BTF_KIND_TYPE_TAG] = &modifier_ops, [BTF_KIND_ENUM64] = &enum64_ops, }; static s32 btf_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { u32 saved_meta_left = meta_left; s32 var_meta_size; if (meta_left < sizeof(*t)) { btf_verifier_log(env, "[%u] meta_left:%u meta_needed:%zu", env->log_type_id, meta_left, sizeof(*t)); return -EINVAL; } meta_left -= sizeof(*t); if (t->info & ~BTF_INFO_MASK) { btf_verifier_log(env, "[%u] Invalid btf_info:%x", env->log_type_id, t->info); return -EINVAL; } if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX || BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) { btf_verifier_log(env, "[%u] Invalid kind:%u", env->log_type_id, BTF_INFO_KIND(t->info)); return -EINVAL; } if (!btf_name_offset_valid(env->btf, t->name_off)) { btf_verifier_log(env, "[%u] Invalid name_offset:%u", env->log_type_id, t->name_off); return -EINVAL; } var_meta_size = btf_type_ops(t)->check_meta(env, t, meta_left); if (var_meta_size < 0) return var_meta_size; meta_left -= var_meta_size; return saved_meta_left - meta_left; } static int btf_check_all_metas(struct btf_verifier_env *env) { struct btf *btf = env->btf; struct btf_header *hdr; void *cur, *end; hdr = &btf->hdr; cur = btf->nohdr_data + hdr->type_off; end = cur + hdr->type_len; env->log_type_id = btf->base_btf ? btf->start_id : 1; while (cur < end) { struct btf_type *t = cur; s32 meta_size; meta_size = btf_check_meta(env, t, end - cur); if (meta_size < 0) return meta_size; btf_add_type(env, t); cur += meta_size; env->log_type_id++; } return 0; } static bool btf_resolve_valid(struct btf_verifier_env *env, const struct btf_type *t, u32 type_id) { struct btf *btf = env->btf; if (!env_type_is_resolved(env, type_id)) return false; if (btf_type_is_struct(t) || btf_type_is_datasec(t)) return !btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); if (btf_type_is_decl_tag(t) || btf_type_is_func(t)) return btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); if (btf_type_is_modifier(t) || btf_type_is_ptr(t) || btf_type_is_var(t)) { t = btf_type_id_resolve(btf, &type_id); return t && !btf_type_is_modifier(t) && !btf_type_is_var(t) && !btf_type_is_datasec(t); } if (btf_type_is_array(t)) { const struct btf_array *array = btf_type_array(t); const struct btf_type *elem_type; u32 elem_type_id = array->type; u32 elem_size; elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size); return elem_type && !btf_type_is_modifier(elem_type) && (array->nelems * elem_size == btf_resolved_type_size(btf, type_id)); } return false; } static int btf_resolve(struct btf_verifier_env *env, const struct btf_type *t, u32 type_id) { u32 save_log_type_id = env->log_type_id; const struct resolve_vertex *v; int err = 0; env->resolve_mode = RESOLVE_TBD; env_stack_push(env, t, type_id); while (!err && (v = env_stack_peak(env))) { env->log_type_id = v->type_id; err = btf_type_ops(v->t)->resolve(env, v); } env->log_type_id = type_id; if (err == -E2BIG) { btf_verifier_log_type(env, t, "Exceeded max resolving depth:%u", MAX_RESOLVE_DEPTH); } else if (err == -EEXIST) { btf_verifier_log_type(env, t, "Loop detected"); } /* Final sanity check */ if (!err && !btf_resolve_valid(env, t, type_id)) { btf_verifier_log_type(env, t, "Invalid resolve state"); err = -EINVAL; } env->log_type_id = save_log_type_id; return err; } static int btf_check_all_types(struct btf_verifier_env *env) { struct btf *btf = env->btf; const struct btf_type *t; u32 type_id, i; int err; err = env_resolve_init(env); if (err) return err; env->phase++; for (i = btf->base_btf ? 0 : 1; i < btf->nr_types; i++) { type_id = btf->start_id + i; t = btf_type_by_id(btf, type_id); env->log_type_id = type_id; if (btf_type_needs_resolve(t) && !env_type_is_resolved(env, type_id)) { err = btf_resolve(env, t, type_id); if (err) return err; } if (btf_type_is_func_proto(t)) { err = btf_func_proto_check(env, t); if (err) return err; } } return 0; } static int btf_parse_type_sec(struct btf_verifier_env *env) { const struct btf_header *hdr = &env->btf->hdr; int err; /* Type section must align to 4 bytes */ if (hdr->type_off & (sizeof(u32) - 1)) { btf_verifier_log(env, "Unaligned type_off"); return -EINVAL; } if (!env->btf->base_btf && !hdr->type_len) { btf_verifier_log(env, "No type found"); return -EINVAL; } err = btf_check_all_metas(env); if (err) return err; return btf_check_all_types(env); } static int btf_parse_str_sec(struct btf_verifier_env *env) { const struct btf_header *hdr; struct btf *btf = env->btf; const char *start, *end; hdr = &btf->hdr; start = btf->nohdr_data + hdr->str_off; end = start + hdr->str_len; if (end != btf->data + btf->data_size) { btf_verifier_log(env, "String section is not at the end"); return -EINVAL; } btf->strings = start; if (btf->base_btf && !hdr->str_len) return 0; if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || end[-1]) { btf_verifier_log(env, "Invalid string section"); return -EINVAL; } if (!btf->base_btf && start[0]) { btf_verifier_log(env, "Invalid string section"); return -EINVAL; } return 0; } static const size_t btf_sec_info_offset[] = { offsetof(struct btf_header, type_off), offsetof(struct btf_header, str_off), }; static int btf_sec_info_cmp(const void *a, const void *b) { const struct btf_sec_info *x = a; const struct btf_sec_info *y = b; return (int)(x->off - y->off) ? : (int)(x->len - y->len); } static int btf_check_sec_info(struct btf_verifier_env *env, u32 btf_data_size) { struct btf_sec_info secs[ARRAY_SIZE(btf_sec_info_offset)]; u32 total, expected_total, i; const struct btf_header *hdr; const struct btf *btf; btf = env->btf; hdr = &btf->hdr; /* Populate the secs from hdr */ for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) secs[i] = *(struct btf_sec_info *)((void *)hdr + btf_sec_info_offset[i]); sort(secs, ARRAY_SIZE(btf_sec_info_offset), sizeof(struct btf_sec_info), btf_sec_info_cmp, NULL); /* Check for gaps and overlap among sections */ total = 0; expected_total = btf_data_size - hdr->hdr_len; for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) { if (expected_total < secs[i].off) { btf_verifier_log(env, "Invalid section offset"); return -EINVAL; } if (total < secs[i].off) { /* gap */ btf_verifier_log(env, "Unsupported section found"); return -EINVAL; } if (total > secs[i].off) { btf_verifier_log(env, "Section overlap found"); return -EINVAL; } if (expected_total - total < secs[i].len) { btf_verifier_log(env, "Total section length too long"); return -EINVAL; } total += secs[i].len; } /* There is data other than hdr and known sections */ if (expected_total != total) { btf_verifier_log(env, "Unsupported section found"); return -EINVAL; } return 0; } static int btf_parse_hdr(struct btf_verifier_env *env) { u32 hdr_len, hdr_copy, btf_data_size; const struct btf_header *hdr; struct btf *btf; btf = env->btf; btf_data_size = btf->data_size; if (btf_data_size < offsetofend(struct btf_header, hdr_len)) { btf_verifier_log(env, "hdr_len not found"); return -EINVAL; } hdr = btf->data; hdr_len = hdr->hdr_len; if (btf_data_size < hdr_len) { btf_verifier_log(env, "btf_header not found"); return -EINVAL; } /* Ensure the unsupported header fields are zero */ if (hdr_len > sizeof(btf->hdr)) { u8 *expected_zero = btf->data + sizeof(btf->hdr); u8 *end = btf->data + hdr_len; for (; expected_zero < end; expected_zero++) { if (*expected_zero) { btf_verifier_log(env, "Unsupported btf_header"); return -E2BIG; } } } hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr)); memcpy(&btf->hdr, btf->data, hdr_copy); hdr = &btf->hdr; btf_verifier_log_hdr(env, btf_data_size); if (hdr->magic != BTF_MAGIC) { btf_verifier_log(env, "Invalid magic"); return -EINVAL; } if (hdr->version != BTF_VERSION) { btf_verifier_log(env, "Unsupported version"); return -ENOTSUPP; } if (hdr->flags) { btf_verifier_log(env, "Unsupported flags"); return -ENOTSUPP; } if (!btf->base_btf && btf_data_size == hdr->hdr_len) { btf_verifier_log(env, "No data"); return -EINVAL; } return btf_check_sec_info(env, btf_data_size); } static const char *alloc_obj_fields[] = { "bpf_spin_lock", "bpf_list_head", "bpf_list_node", "bpf_rb_root", "bpf_rb_node", "bpf_refcount", }; static struct btf_struct_metas * btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) { union { struct btf_id_set set; struct { u32 _cnt; u32 _ids[ARRAY_SIZE(alloc_obj_fields)]; } _arr; } aof; struct btf_struct_metas *tab = NULL; int i, n, id, ret; BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0); BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32)); memset(&aof, 0, sizeof(aof)); for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) { /* Try to find whether this special type exists in user BTF, and * if so remember its ID so we can easily find it among members * of structs that we iterate in the next loop. */ id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT); if (id < 0) continue; aof.set.ids[aof.set.cnt++] = id; } if (!aof.set.cnt) return NULL; sort(&aof.set.ids, aof.set.cnt, sizeof(aof.set.ids[0]), btf_id_cmp_func, NULL); n = btf_nr_types(btf); for (i = 1; i < n; i++) { struct btf_struct_metas *new_tab; const struct btf_member *member; struct btf_struct_meta *type; struct btf_record *record; const struct btf_type *t; int j, tab_cnt; t = btf_type_by_id(btf, i); if (!t) { ret = -EINVAL; goto free; } if (!__btf_type_is_struct(t)) continue; cond_resched(); for_each_member(j, t, member) { if (btf_id_set_contains(&aof.set, member->type)) goto parse; } continue; parse: tab_cnt = tab ? tab->cnt : 0; new_tab = krealloc(tab, offsetof(struct btf_struct_metas, types[tab_cnt + 1]), GFP_KERNEL | __GFP_NOWARN); if (!new_tab) { ret = -ENOMEM; goto free; } if (!tab) new_tab->cnt = 0; tab = new_tab; type = &tab->types[tab->cnt]; type->btf_id = i; record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE | BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size); /* The record cannot be unset, treat it as an error if so */ if (IS_ERR_OR_NULL(record)) { ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT; goto free; } type->record = record; tab->cnt++; } return tab; free: btf_struct_metas_free(tab); return ERR_PTR(ret); } struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id) { struct btf_struct_metas *tab; BUILD_BUG_ON(offsetof(struct btf_struct_meta, btf_id) != 0); tab = btf->struct_meta_tab; if (!tab) return NULL; return bsearch(&btf_id, tab->types, tab->cnt, sizeof(tab->types[0]), btf_id_cmp_func); } static int btf_check_type_tags(struct btf_verifier_env *env, struct btf *btf, int start_id) { int i, n, good_id = start_id - 1; bool in_tags; n = btf_nr_types(btf); for (i = start_id; i < n; i++) { const struct btf_type *t; int chain_limit = 32; u32 cur_id = i; t = btf_type_by_id(btf, i); if (!t) return -EINVAL; if (!btf_type_is_modifier(t)) continue; cond_resched(); in_tags = btf_type_is_type_tag(t); while (btf_type_is_modifier(t)) { if (!chain_limit--) { btf_verifier_log(env, "Max chain length or cycle detected"); return -ELOOP; } if (btf_type_is_type_tag(t)) { if (!in_tags) { btf_verifier_log(env, "Type tags don't precede modifiers"); return -EINVAL; } } else if (in_tags) { in_tags = false; } if (cur_id <= good_id) break; /* Move to next type */ cur_id = t->type; t = btf_type_by_id(btf, cur_id); if (!t) return -EINVAL; } good_id = i; } return 0; } static int finalize_log(struct bpf_verifier_log *log, bpfptr_t uattr, u32 uattr_size) { u32 log_true_size; int err; err = bpf_vlog_finalize(log, &log_true_size); if (uattr_size >= offsetofend(union bpf_attr, btf_log_true_size) && copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size), &log_true_size, sizeof(log_true_size))) err = -EFAULT; return err; } static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { bpfptr_t btf_data = make_bpfptr(attr->btf, uattr.is_kernel); char __user *log_ubuf = u64_to_user_ptr(attr->btf_log_buf); struct btf_struct_metas *struct_meta_tab; struct btf_verifier_env *env = NULL; struct btf *btf = NULL; u8 *data; int err, ret; if (attr->btf_size > BTF_MAX_SIZE) return ERR_PTR(-E2BIG); env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); if (!env) return ERR_PTR(-ENOMEM); /* user could have requested verbose verifier output * and supplied buffer to store the verification trace */ err = bpf_vlog_init(&env->log, attr->btf_log_level, log_ubuf, attr->btf_log_size); if (err) goto errout_free; btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); if (!btf) { err = -ENOMEM; goto errout; } env->btf = btf; data = kvmalloc(attr->btf_size, GFP_KERNEL | __GFP_NOWARN); if (!data) { err = -ENOMEM; goto errout; } btf->data = data; btf->data_size = attr->btf_size; if (copy_from_bpfptr(data, btf_data, attr->btf_size)) { err = -EFAULT; goto errout; } err = btf_parse_hdr(env); if (err) goto errout; btf->nohdr_data = btf->data + btf->hdr.hdr_len; err = btf_parse_str_sec(env); if (err) goto errout; err = btf_parse_type_sec(env); if (err) goto errout; err = btf_check_type_tags(env, btf, 1); if (err) goto errout; struct_meta_tab = btf_parse_struct_metas(&env->log, btf); if (IS_ERR(struct_meta_tab)) { err = PTR_ERR(struct_meta_tab); goto errout; } btf->struct_meta_tab = struct_meta_tab; if (struct_meta_tab) { int i; for (i = 0; i < struct_meta_tab->cnt; i++) { err = btf_check_and_fixup_fields(btf, struct_meta_tab->types[i].record); if (err < 0) goto errout_meta; } } err = finalize_log(&env->log, uattr, uattr_size); if (err) goto errout_free; btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); return btf; errout_meta: btf_free_struct_meta_tab(btf); errout: /* overwrite err with -ENOSPC or -EFAULT */ ret = finalize_log(&env->log, uattr, uattr_size); if (ret) err = ret; errout_free: btf_verifier_env_free(env); if (btf) btf_free(btf); return ERR_PTR(err); } extern char __weak __start_BTF[]; extern char __weak __stop_BTF[]; extern struct btf *btf_vmlinux; #define BPF_MAP_TYPE(_id, _ops) #define BPF_LINK_TYPE(_id, _name) static union { struct bpf_ctx_convert { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ prog_ctx_type _id##_prog; \ kern_ctx_type _id##_kern; #include <linux/bpf_types.h> #undef BPF_PROG_TYPE } *__t; /* 't' is written once under lock. Read many times. */ const struct btf_type *t; } bpf_ctx_convert; enum { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ __ctx_convert##_id, #include <linux/bpf_types.h> #undef BPF_PROG_TYPE __ctx_convert_unused, /* to avoid empty enum in extreme .config */ }; static u8 bpf_ctx_convert_map[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ [_id] = __ctx_convert##_id, #include <linux/bpf_types.h> #undef BPF_PROG_TYPE 0, /* avoid empty array */ }; #undef BPF_MAP_TYPE #undef BPF_LINK_TYPE const struct btf_member * btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, enum bpf_prog_type prog_type, int arg) { const struct btf_type *conv_struct; const struct btf_type *ctx_struct; const struct btf_member *ctx_type; const char *tname, *ctx_tname; conv_struct = bpf_ctx_convert.t; if (!conv_struct) { bpf_log(log, "btf_vmlinux is malformed\n"); return NULL; } t = btf_type_by_id(btf, t->type); while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); if (!btf_type_is_struct(t)) { /* Only pointer to struct is supported for now. * That means that BPF_PROG_TYPE_TRACEPOINT with BTF * is not supported yet. * BPF_PROG_TYPE_RAW_TRACEPOINT is fine. */ return NULL; } tname = btf_name_by_offset(btf, t->name_off); if (!tname) { bpf_log(log, "arg#%d struct doesn't have a name\n", arg); return NULL; } /* prog_type is valid bpf program type. No need for bounds check. */ ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2; /* ctx_struct is a pointer to prog_ctx_type in vmlinux. * Like 'struct __sk_buff' */ ctx_struct = btf_type_by_id(btf_vmlinux, ctx_type->type); if (!ctx_struct) /* should not happen */ return NULL; again: ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off); if (!ctx_tname) { /* should not happen */ bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n"); return NULL; } /* only compare that prog's ctx type name is the same as * kernel expects. No need to compare field by field. * It's ok for bpf prog to do: * struct __sk_buff {}; * int socket_filter_bpf_prog(struct __sk_buff *skb) * { // no fields of skb are ever used } */ if (strcmp(ctx_tname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0) return ctx_type; if (strcmp(ctx_tname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0) return ctx_type; if (strcmp(ctx_tname, tname)) { /* bpf_user_pt_regs_t is a typedef, so resolve it to * underlying struct and check name again */ if (!btf_type_is_modifier(ctx_struct)) return NULL; while (btf_type_is_modifier(ctx_struct)) ctx_struct = btf_type_by_id(btf_vmlinux, ctx_struct->type); goto again; } return ctx_type; } static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, struct btf *btf, const struct btf_type *t, enum bpf_prog_type prog_type, int arg) { const struct btf_member *prog_ctx_type, *kern_ctx_type; prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type, arg); if (!prog_ctx_type) return -ENOENT; kern_ctx_type = prog_ctx_type + 1; return kern_ctx_type->type; } int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type) { const struct btf_member *kctx_member; const struct btf_type *conv_struct; const struct btf_type *kctx_type; u32 kctx_type_id; conv_struct = bpf_ctx_convert.t; /* get member for kernel ctx type */ kctx_member = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2 + 1; kctx_type_id = kctx_member->type; kctx_type = btf_type_by_id(btf_vmlinux, kctx_type_id); if (!btf_type_is_struct(kctx_type)) { bpf_log(log, "kern ctx type id %u is not a struct\n", kctx_type_id); return -EINVAL; } return kctx_type_id; } BTF_ID_LIST(bpf_ctx_convert_btf_id) BTF_ID(struct, bpf_ctx_convert) struct btf *btf_parse_vmlinux(void) { struct btf_verifier_env *env = NULL; struct bpf_verifier_log *log; struct btf *btf = NULL; int err; env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); if (!env) return ERR_PTR(-ENOMEM); log = &env->log; log->level = BPF_LOG_KERNEL; btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); if (!btf) { err = -ENOMEM; goto errout; } env->btf = btf; btf->data = __start_BTF; btf->data_size = __stop_BTF - __start_BTF; btf->kernel_btf = true; snprintf(btf->name, sizeof(btf->name), "vmlinux"); err = btf_parse_hdr(env); if (err) goto errout; btf->nohdr_data = btf->data + btf->hdr.hdr_len; err = btf_parse_str_sec(env); if (err) goto errout; err = btf_check_all_metas(env); if (err) goto errout; err = btf_check_type_tags(env, btf, 1); if (err) goto errout; /* btf_parse_vmlinux() runs under bpf_verifier_lock */ bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); bpf_struct_ops_init(btf, log); refcount_set(&btf->refcnt, 1); err = btf_alloc_id(btf); if (err) goto errout; btf_verifier_env_free(env); return btf; errout: btf_verifier_env_free(env); if (btf) { kvfree(btf->types); kfree(btf); } return ERR_PTR(err); } #ifdef CONFIG_DEBUG_INFO_BTF_MODULES static struct btf *btf_parse_module(const char *module_name, const void *data, unsigned int data_size) { struct btf_verifier_env *env = NULL; struct bpf_verifier_log *log; struct btf *btf = NULL, *base_btf; int err; base_btf = bpf_get_btf_vmlinux(); if (IS_ERR(base_btf)) return base_btf; if (!base_btf) return ERR_PTR(-EINVAL); env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); if (!env) return ERR_PTR(-ENOMEM); log = &env->log; log->level = BPF_LOG_KERNEL; btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); if (!btf) { err = -ENOMEM; goto errout; } env->btf = btf; btf->base_btf = base_btf; btf->start_id = base_btf->nr_types; btf->start_str_off = base_btf->hdr.str_len; btf->kernel_btf = true; snprintf(btf->name, sizeof(btf->name), "%s", module_name); btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN); if (!btf->data) { err = -ENOMEM; goto errout; } memcpy(btf->data, data, data_size); btf->data_size = data_size; err = btf_parse_hdr(env); if (err) goto errout; btf->nohdr_data = btf->data + btf->hdr.hdr_len; err = btf_parse_str_sec(env); if (err) goto errout; err = btf_check_all_metas(env); if (err) goto errout; err = btf_check_type_tags(env, btf, btf_nr_types(base_btf)); if (err) goto errout; btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); return btf; errout: btf_verifier_env_free(env); if (btf) { kvfree(btf->data); kvfree(btf->types); kfree(btf); } return ERR_PTR(err); } #endif /* CONFIG_DEBUG_INFO_BTF_MODULES */ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog) { struct bpf_prog *tgt_prog = prog->aux->dst_prog; if (tgt_prog) return tgt_prog->aux->btf; else return prog->aux->attach_btf; } static bool is_int_ptr(struct btf *btf, const struct btf_type *t) { /* skip modifiers */ t = btf_type_skip_modifiers(btf, t->type, NULL); return btf_type_is_int(t); } static u32 get_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, int off) { const struct btf_param *args; const struct btf_type *t; u32 offset = 0, nr_args; int i; if (!func_proto) return off / 8; nr_args = btf_type_vlen(func_proto); args = (const struct btf_param *)(func_proto + 1); for (i = 0; i < nr_args; i++) { t = btf_type_skip_modifiers(btf, args[i].type, NULL); offset += btf_type_is_ptr(t) ? 8 : roundup(t->size, 8); if (off < offset) return i; } t = btf_type_skip_modifiers(btf, func_proto->type, NULL); offset += btf_type_is_ptr(t) ? 8 : roundup(t->size, 8); if (off < offset) return nr_args; return nr_args + 1; } static bool prog_args_trusted(const struct bpf_prog *prog) { enum bpf_attach_type atype = prog->expected_attach_type; switch (prog->type) { case BPF_PROG_TYPE_TRACING: return atype == BPF_TRACE_RAW_TP || atype == BPF_TRACE_ITER; case BPF_PROG_TYPE_LSM: return bpf_lsm_is_trusted(prog); case BPF_PROG_TYPE_STRUCT_OPS: return true; default: return false; } } bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { const struct btf_type *t = prog->aux->attach_func_proto; struct bpf_prog *tgt_prog = prog->aux->dst_prog; struct btf *btf = bpf_prog_get_target_btf(prog); const char *tname = prog->aux->attach_func_name; struct bpf_verifier_log *log = info->log; const struct btf_param *args; const char *tag_value; u32 nr_args, arg; int i, ret; if (off % 8) { bpf_log(log, "func '%s' offset %d is not multiple of 8\n", tname, off); return false; } arg = get_ctx_arg_idx(btf, t, off); args = (const struct btf_param *)(t + 1); /* if (t == NULL) Fall back to default BPF prog with * MAX_BPF_FUNC_REG_ARGS u64 arguments. */ nr_args = t ? btf_type_vlen(t) : MAX_BPF_FUNC_REG_ARGS; if (prog->aux->attach_btf_trace) { /* skip first 'void *__data' argument in btf_trace_##name typedef */ args++; nr_args--; } if (arg > nr_args) { bpf_log(log, "func '%s' doesn't have %d-th argument\n", tname, arg + 1); return false; } if (arg == nr_args) { switch (prog->expected_attach_type) { case BPF_LSM_CGROUP: case BPF_LSM_MAC: case BPF_TRACE_FEXIT: /* When LSM programs are attached to void LSM hooks * they use FEXIT trampolines and when attached to * int LSM hooks, they use MODIFY_RETURN trampolines. * * While the LSM programs are BPF_MODIFY_RETURN-like * the check: * * if (ret_type != 'int') * return -EINVAL; * * is _not_ done here. This is still safe as LSM hooks * have only void and int return types. */ if (!t) return true; t = btf_type_by_id(btf, t->type); break; case BPF_MODIFY_RETURN: /* For now the BPF_MODIFY_RETURN can only be attached to * functions that return an int. */ if (!t) return false; t = btf_type_skip_modifiers(btf, t->type, NULL); if (!btf_type_is_small_int(t)) { bpf_log(log, "ret type %s not allowed for fmod_ret\n", btf_type_str(t)); return false; } break; default: bpf_log(log, "func '%s' doesn't have %d-th argument\n", tname, arg + 1); return false; } } else { if (!t) /* Default prog with MAX_BPF_FUNC_REG_ARGS args */ return true; t = btf_type_by_id(btf, args[arg].type); } /* skip modifiers */ while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); if (btf_type_is_small_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t)) /* accessing a scalar */ return true; if (!btf_type_is_ptr(t)) { bpf_log(log, "func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n", tname, arg, __btf_name_by_offset(btf, t->name_off), btf_type_str(t)); return false; } /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */ for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; u32 type, flag; type = base_type(ctx_arg_info->reg_type); flag = type_flag(ctx_arg_info->reg_type); if (ctx_arg_info->offset == off && type == PTR_TO_BUF && (flag & PTR_MAYBE_NULL)) { info->reg_type = ctx_arg_info->reg_type; return true; } } if (t->type == 0) /* This is a pointer to void. * It is the same as scalar from the verifier safety pov. * No further pointer walking is allowed. */ return true; if (is_int_ptr(btf, t)) return true; /* this is a pointer to another type */ for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; if (ctx_arg_info->offset == off) { if (!ctx_arg_info->btf_id) { bpf_log(log,"invalid btf_id for context argument offset %u\n", off); return false; } info->reg_type = ctx_arg_info->reg_type; info->btf = btf_vmlinux; info->btf_id = ctx_arg_info->btf_id; return true; } } info->reg_type = PTR_TO_BTF_ID; if (prog_args_trusted(prog)) info->reg_type |= PTR_TRUSTED; if (tgt_prog) { enum bpf_prog_type tgt_type; if (tgt_prog->type == BPF_PROG_TYPE_EXT) tgt_type = tgt_prog->aux->saved_dst_prog_type; else tgt_type = tgt_prog->type; ret = btf_translate_to_vmlinux(log, btf, t, tgt_type, arg); if (ret > 0) { info->btf = btf_vmlinux; info->btf_id = ret; return true; } else { return false; } } info->btf = btf; info->btf_id = t->type; t = btf_type_by_id(btf, t->type); if (btf_type_is_type_tag(t)) { tag_value = __btf_name_by_offset(btf, t->name_off); if (strcmp(tag_value, "user") == 0) info->reg_type |= MEM_USER; if (strcmp(tag_value, "percpu") == 0) info->reg_type |= MEM_PERCPU; } /* skip modifiers */ while (btf_type_is_modifier(t)) { info->btf_id = t->type; t = btf_type_by_id(btf, t->type); } if (!btf_type_is_struct(t)) { bpf_log(log, "func '%s' arg%d type %s is not a struct\n", tname, arg, btf_type_str(t)); return false; } bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n", tname, arg, info->btf_id, btf_type_str(t), __btf_name_by_offset(btf, t->name_off)); return true; } enum bpf_struct_walk_result { /* < 0 error */ WALK_SCALAR = 0, WALK_PTR, WALK_STRUCT, }; static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, u32 *next_btf_id, enum bpf_type_flag *flag, const char **field_name) { u32 i, moff, mtrue_end, msize = 0, total_nelems = 0; const struct btf_type *mtype, *elem_type = NULL; const struct btf_member *member; const char *tname, *mname, *tag_value; u32 vlen, elem_id, mid; again: if (btf_type_is_modifier(t)) t = btf_type_skip_modifiers(btf, t->type, NULL); tname = __btf_name_by_offset(btf, t->name_off); if (!btf_type_is_struct(t)) { bpf_log(log, "Type '%s' is not a struct\n", tname); return -EINVAL; } vlen = btf_type_vlen(t); if (BTF_INFO_KIND(t->info) == BTF_KIND_UNION && vlen != 1 && !(*flag & PTR_UNTRUSTED)) /* * walking unions yields untrusted pointers * with exception of __bpf_md_ptr and other * unions with a single member */ *flag |= PTR_UNTRUSTED; if (off + size > t->size) { /* If the last element is a variable size array, we may * need to relax the rule. */ struct btf_array *array_elem; if (vlen == 0) goto error; member = btf_type_member(t) + vlen - 1; mtype = btf_type_skip_modifiers(btf, member->type, NULL); if (!btf_type_is_array(mtype)) goto error; array_elem = (struct btf_array *)(mtype + 1); if (array_elem->nelems != 0) goto error; moff = __btf_member_bit_offset(t, member) / 8; if (off < moff) goto error; /* allow structure and integer */ t = btf_type_skip_modifiers(btf, array_elem->type, NULL); if (btf_type_is_int(t)) return WALK_SCALAR; if (!btf_type_is_struct(t)) goto error; off = (off - moff) % t->size; goto again; error: bpf_log(log, "access beyond struct %s at off %u size %u\n", tname, off, size); return -EACCES; } for_each_member(i, t, member) { /* offset of the field in bytes */ moff = __btf_member_bit_offset(t, member) / 8; if (off + size <= moff) /* won't find anything, field is already too far */ break; if (__btf_member_bitfield_size(t, member)) { u32 end_bit = __btf_member_bit_offset(t, member) + __btf_member_bitfield_size(t, member); /* off <= moff instead of off == moff because clang * does not generate a BTF member for anonymous * bitfield like the ":16" here: * struct { * int :16; * int x:8; * }; */ if (off <= moff && BITS_ROUNDUP_BYTES(end_bit) <= off + size) return WALK_SCALAR; /* off may be accessing a following member * * or * * Doing partial access at either end of this * bitfield. Continue on this case also to * treat it as not accessing this bitfield * and eventually error out as field not * found to keep it simple. * It could be relaxed if there was a legit * partial access case later. */ continue; } /* In case of "off" is pointing to holes of a struct */ if (off < moff) break; /* type of the field */ mid = member->type; mtype = btf_type_by_id(btf, member->type); mname = __btf_name_by_offset(btf, member->name_off); mtype = __btf_resolve_size(btf, mtype, &msize, &elem_type, &elem_id, &total_nelems, &mid); if (IS_ERR(mtype)) { bpf_log(log, "field %s doesn't have size\n", mname); return -EFAULT; } mtrue_end = moff + msize; if (off >= mtrue_end) /* no overlap with member, keep iterating */ continue; if (btf_type_is_array(mtype)) { u32 elem_idx; /* __btf_resolve_size() above helps to * linearize a multi-dimensional array. * * The logic here is treating an array * in a struct as the following way: * * struct outer { * struct inner array[2][2]; * }; * * looks like: * * struct outer { * struct inner array_elem0; * struct inner array_elem1; * struct inner array_elem2; * struct inner array_elem3; * }; * * When accessing outer->array[1][0], it moves * moff to "array_elem2", set mtype to * "struct inner", and msize also becomes * sizeof(struct inner). Then most of the * remaining logic will fall through without * caring the current member is an array or * not. * * Unlike mtype/msize/moff, mtrue_end does not * change. The naming difference ("_true") tells * that it is not always corresponding to * the current mtype/msize/moff. * It is the true end of the current * member (i.e. array in this case). That * will allow an int array to be accessed like * a scratch space, * i.e. allow access beyond the size of * the array's element as long as it is * within the mtrue_end boundary. */ /* skip empty array */ if (moff == mtrue_end) continue; msize /= total_nelems; elem_idx = (off - moff) / msize; moff += elem_idx * msize; mtype = elem_type; mid = elem_id; } /* the 'off' we're looking for is either equal to start * of this field or inside of this struct */ if (btf_type_is_struct(mtype)) { /* our field must be inside that union or struct */ t = mtype; /* return if the offset matches the member offset */ if (off == moff) { *next_btf_id = mid; return WALK_STRUCT; } /* adjust offset we're looking for */ off -= moff; goto again; } if (btf_type_is_ptr(mtype)) { const struct btf_type *stype, *t; enum bpf_type_flag tmp_flag = 0; u32 id; if (msize != size || off != moff) { bpf_log(log, "cannot access ptr member %s with moff %u in struct %s with off %u size %u\n", mname, moff, tname, off, size); return -EACCES; } /* check type tag */ t = btf_type_by_id(btf, mtype->type); if (btf_type_is_type_tag(t)) { tag_value = __btf_name_by_offset(btf, t->name_off); /* check __user tag */ if (strcmp(tag_value, "user") == 0) tmp_flag = MEM_USER; /* check __percpu tag */ if (strcmp(tag_value, "percpu") == 0) tmp_flag = MEM_PERCPU; /* check __rcu tag */ if (strcmp(tag_value, "rcu") == 0) tmp_flag = MEM_RCU; } stype = btf_type_skip_modifiers(btf, mtype->type, &id); if (btf_type_is_struct(stype)) { *next_btf_id = id; *flag |= tmp_flag; if (field_name) *field_name = mname; return WALK_PTR; } } /* Allow more flexible access within an int as long as * it is within mtrue_end. * Since mtrue_end could be the end of an array, * that also allows using an array of int as a scratch * space. e.g. skb->cb[]. */ if (off + size > mtrue_end && !(*flag & PTR_UNTRUSTED)) { bpf_log(log, "access beyond the end of member %s (mend:%u) in struct %s with off %u size %u\n", mname, mtrue_end, tname, off, size); return -EACCES; } return WALK_SCALAR; } bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off); return -EINVAL; } int btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off, int size, enum bpf_access_type atype __maybe_unused, u32 *next_btf_id, enum bpf_type_flag *flag, const char **field_name) { const struct btf *btf = reg->btf; enum bpf_type_flag tmp_flag = 0; const struct btf_type *t; u32 id = reg->btf_id; int err; while (type_is_alloc(reg->type)) { struct btf_struct_meta *meta; struct btf_record *rec; int i; meta = btf_find_struct_meta(btf, id); if (!meta) break; rec = meta->record; for (i = 0; i < rec->cnt; i++) { struct btf_field *field = &rec->fields[i]; u32 offset = field->offset; if (off < offset + btf_field_type_size(field->type) && offset < off + size) { bpf_log(log, "direct access to %s is disallowed\n", btf_field_type_name(field->type)); return -EACCES; } } break; } t = btf_type_by_id(btf, id); do { err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag, field_name); switch (err) { case WALK_PTR: /* For local types, the destination register cannot * become a pointer again. */ if (type_is_alloc(reg->type)) return SCALAR_VALUE; /* If we found the pointer or scalar on t+off, * we're done. */ *next_btf_id = id; *flag = tmp_flag; return PTR_TO_BTF_ID; case WALK_SCALAR: return SCALAR_VALUE; case WALK_STRUCT: /* We found nested struct, so continue the search * by diving in it. At this point the offset is * aligned with the new type, so set it to 0. */ t = btf_type_by_id(btf, id); off = 0; break; default: /* It's either error or unknown return value.. * scream and leave. */ if (WARN_ONCE(err > 0, "unknown btf_struct_walk return value")) return -EINVAL; return err; } } while (t); return -EINVAL; } /* Check that two BTF types, each specified as an BTF object + id, are exactly * the same. Trivial ID check is not enough due to module BTFs, because we can * end up with two different module BTFs, but IDs point to the common type in * vmlinux BTF. */ bool btf_types_are_same(const struct btf *btf1, u32 id1, const struct btf *btf2, u32 id2) { if (id1 != id2) return false; if (btf1 == btf2) return true; return btf_type_by_id(btf1, id1) == btf_type_by_id(btf2, id2); } bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, const struct btf *need_btf, u32 need_type_id, bool strict) { const struct btf_type *type; enum bpf_type_flag flag = 0; int err; /* Are we already done? */ if (off == 0 && btf_types_are_same(btf, id, need_btf, need_type_id)) return true; /* In case of strict type match, we do not walk struct, the top level * type match must succeed. When strict is true, off should have already * been 0. */ if (strict) return false; again: type = btf_type_by_id(btf, id); if (!type) return false; err = btf_struct_walk(log, btf, type, off, 1, &id, &flag, NULL); if (err != WALK_STRUCT) return false; /* We found nested struct object. If it matches * the requested ID, we're done. Otherwise let's * continue the search with offset 0 in the new * type. */ if (!btf_types_are_same(btf, id, need_btf, need_type_id)) { off = 0; goto again; } return true; } static int __get_type_size(struct btf *btf, u32 btf_id, const struct btf_type **ret_type) { const struct btf_type *t; *ret_type = btf_type_by_id(btf, 0); if (!btf_id) /* void */ return 0; t = btf_type_by_id(btf, btf_id); while (t && btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); if (!t) return -EINVAL; *ret_type = t; if (btf_type_is_ptr(t)) /* kernel size of pointer. Not BPF's size of pointer*/ return sizeof(void *); if (btf_type_is_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t)) return t->size; return -EINVAL; } static u8 __get_type_fmodel_flags(const struct btf_type *t) { u8 flags = 0; if (__btf_type_is_struct(t)) flags |= BTF_FMODEL_STRUCT_ARG; if (btf_type_is_signed_int(t)) flags |= BTF_FMODEL_SIGNED_ARG; return flags; } int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf *btf, const struct btf_type *func, const char *tname, struct btf_func_model *m) { const struct btf_param *args; const struct btf_type *t; u32 i, nargs; int ret; if (!func) { /* BTF function prototype doesn't match the verifier types. * Fall back to MAX_BPF_FUNC_REG_ARGS u64 args. */ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { m->arg_size[i] = 8; m->arg_flags[i] = 0; } m->ret_size = 8; m->ret_flags = 0; m->nr_args = MAX_BPF_FUNC_REG_ARGS; return 0; } args = (const struct btf_param *)(func + 1); nargs = btf_type_vlen(func); if (nargs > MAX_BPF_FUNC_ARGS) { bpf_log(log, "The function %s has %d arguments. Too many.\n", tname, nargs); return -EINVAL; } ret = __get_type_size(btf, func->type, &t); if (ret < 0 || __btf_type_is_struct(t)) { bpf_log(log, "The function %s return type %s is unsupported.\n", tname, btf_type_str(t)); return -EINVAL; } m->ret_size = ret; m->ret_flags = __get_type_fmodel_flags(t); for (i = 0; i < nargs; i++) { if (i == nargs - 1 && args[i].type == 0) { bpf_log(log, "The function %s with variable args is unsupported.\n", tname); return -EINVAL; } ret = __get_type_size(btf, args[i].type, &t); /* No support of struct argument size greater than 16 bytes */ if (ret < 0 || ret > 16) { bpf_log(log, "The function %s arg%d type %s is unsupported.\n", tname, i, btf_type_str(t)); return -EINVAL; } if (ret == 0) { bpf_log(log, "The function %s has malformed void argument.\n", tname); return -EINVAL; } m->arg_size[i] = ret; m->arg_flags[i] = __get_type_fmodel_flags(t); } m->nr_args = nargs; return 0; } /* Compare BTFs of two functions assuming only scalars and pointers to context. * t1 points to BTF_KIND_FUNC in btf1 * t2 points to BTF_KIND_FUNC in btf2 * Returns: * EINVAL - function prototype mismatch * EFAULT - verifier bug * 0 - 99% match. The last 1% is validated by the verifier. */ static int btf_check_func_type_match(struct bpf_verifier_log *log, struct btf *btf1, const struct btf_type *t1, struct btf *btf2, const struct btf_type *t2) { const struct btf_param *args1, *args2; const char *fn1, *fn2, *s1, *s2; u32 nargs1, nargs2, i; fn1 = btf_name_by_offset(btf1, t1->name_off); fn2 = btf_name_by_offset(btf2, t2->name_off); if (btf_func_linkage(t1) != BTF_FUNC_GLOBAL) { bpf_log(log, "%s() is not a global function\n", fn1); return -EINVAL; } if (btf_func_linkage(t2) != BTF_FUNC_GLOBAL) { bpf_log(log, "%s() is not a global function\n", fn2); return -EINVAL; } t1 = btf_type_by_id(btf1, t1->type); if (!t1 || !btf_type_is_func_proto(t1)) return -EFAULT; t2 = btf_type_by_id(btf2, t2->type); if (!t2 || !btf_type_is_func_proto(t2)) return -EFAULT; args1 = (const struct btf_param *)(t1 + 1); nargs1 = btf_type_vlen(t1); args2 = (const struct btf_param *)(t2 + 1); nargs2 = btf_type_vlen(t2); if (nargs1 != nargs2) { bpf_log(log, "%s() has %d args while %s() has %d args\n", fn1, nargs1, fn2, nargs2); return -EINVAL; } t1 = btf_type_skip_modifiers(btf1, t1->type, NULL); t2 = btf_type_skip_modifiers(btf2, t2->type, NULL); if (t1->info != t2->info) { bpf_log(log, "Return type %s of %s() doesn't match type %s of %s()\n", btf_type_str(t1), fn1, btf_type_str(t2), fn2); return -EINVAL; } for (i = 0; i < nargs1; i++) { t1 = btf_type_skip_modifiers(btf1, args1[i].type, NULL); t2 = btf_type_skip_modifiers(btf2, args2[i].type, NULL); if (t1->info != t2->info) { bpf_log(log, "arg%d in %s() is %s while %s() has %s\n", i, fn1, btf_type_str(t1), fn2, btf_type_str(t2)); return -EINVAL; } if (btf_type_has_size(t1) && t1->size != t2->size) { bpf_log(log, "arg%d in %s() has size %d while %s() has %d\n", i, fn1, t1->size, fn2, t2->size); return -EINVAL; } /* global functions are validated with scalars and pointers * to context only. And only global functions can be replaced. * Hence type check only those types. */ if (btf_type_is_int(t1) || btf_is_any_enum(t1)) continue; if (!btf_type_is_ptr(t1)) { bpf_log(log, "arg%d in %s() has unrecognized type\n", i, fn1); return -EINVAL; } t1 = btf_type_skip_modifiers(btf1, t1->type, NULL); t2 = btf_type_skip_modifiers(btf2, t2->type, NULL); if (!btf_type_is_struct(t1)) { bpf_log(log, "arg%d in %s() is not a pointer to context\n", i, fn1); return -EINVAL; } if (!btf_type_is_struct(t2)) { bpf_log(log, "arg%d in %s() is not a pointer to context\n", i, fn2); return -EINVAL; } /* This is an optional check to make program writing easier. * Compare names of structs and report an error to the user. * btf_prepare_func_args() already checked that t2 struct * is a context type. btf_prepare_func_args() will check * later that t1 struct is a context type as well. */ s1 = btf_name_by_offset(btf1, t1->name_off); s2 = btf_name_by_offset(btf2, t2->name_off); if (strcmp(s1, s2)) { bpf_log(log, "arg%d %s(struct %s *) doesn't match %s(struct %s *)\n", i, fn1, s1, fn2, s2); return -EINVAL; } } return 0; } /* Compare BTFs of given program with BTF of target program */ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, struct btf *btf2, const struct btf_type *t2) { struct btf *btf1 = prog->aux->btf; const struct btf_type *t1; u32 btf_id = 0; if (!prog->aux->func_info) { bpf_log(log, "Program extension requires BTF\n"); return -EINVAL; } btf_id = prog->aux->func_info[0].type_id; if (!btf_id) return -EFAULT; t1 = btf_type_by_id(btf1, btf_id); if (!t1 || !btf_type_is_func(t1)) return -EFAULT; return btf_check_func_type_match(log, btf1, t1, btf2, t2); } static int btf_check_func_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, bool ptr_to_mem_ok, bool processing_call) { enum bpf_prog_type prog_type = resolve_prog_type(env->prog); struct bpf_verifier_log *log = &env->log; const char *func_name, *ref_tname; const struct btf_type *t, *ref_t; const struct btf_param *args; u32 i, nargs, ref_id; int ret; t = btf_type_by_id(btf, func_id); if (!t || !btf_type_is_func(t)) { /* These checks were already done by the verifier while loading * struct bpf_func_info or in add_kfunc_call(). */ bpf_log(log, "BTF of func_id %u doesn't point to KIND_FUNC\n", func_id); return -EFAULT; } func_name = btf_name_by_offset(btf, t->name_off); t = btf_type_by_id(btf, t->type); if (!t || !btf_type_is_func_proto(t)) { bpf_log(log, "Invalid BTF of func %s\n", func_name); return -EFAULT; } args = (const struct btf_param *)(t + 1); nargs = btf_type_vlen(t); if (nargs > MAX_BPF_FUNC_REG_ARGS) { bpf_log(log, "Function %s has %d > %d args\n", func_name, nargs, MAX_BPF_FUNC_REG_ARGS); return -EINVAL; } /* check that BTF function arguments match actual types that the * verifier sees. */ for (i = 0; i < nargs; i++) { enum bpf_arg_type arg_type = ARG_DONTCARE; u32 regno = i + 1; struct bpf_reg_state *reg = ®s[regno]; t = btf_type_skip_modifiers(btf, args[i].type, NULL); if (btf_type_is_scalar(t)) { if (reg->type == SCALAR_VALUE) continue; bpf_log(log, "R%d is not a scalar\n", regno); return -EINVAL; } if (!btf_type_is_ptr(t)) { bpf_log(log, "Unrecognized arg#%d type %s\n", i, btf_type_str(t)); return -EINVAL; } ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); ref_tname = btf_name_by_offset(btf, ref_t->name_off); ret = check_func_arg_reg_off(env, reg, regno, arg_type); if (ret < 0) return ret; if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { /* If function expects ctx type in BTF check that caller * is passing PTR_TO_CTX. */ if (reg->type != PTR_TO_CTX) { bpf_log(log, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t)); return -EINVAL; } } else if (ptr_to_mem_ok && processing_call) { const struct btf_type *resolve_ret; u32 type_size; resolve_ret = btf_resolve_size(btf, ref_t, &type_size); if (IS_ERR(resolve_ret)) { bpf_log(log, "arg#%d reference type('%s %s') size cannot be determined: %ld\n", i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret)); return -EINVAL; } if (check_mem_reg(env, reg, regno, type_size)) return -EINVAL; } else { bpf_log(log, "reg type unsupported for arg#%d function %s#%d\n", i, func_name, func_id); return -EINVAL; } } return 0; } /* Compare BTF of a function declaration with given bpf_reg_state. * Returns: * EFAULT - there is a verifier bug. Abort verification. * EINVAL - there is a type mismatch or BTF is not available. * 0 - BTF matches with what bpf_reg_state expects. * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. */ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs) { struct bpf_prog *prog = env->prog; struct btf *btf = prog->aux->btf; bool is_global; u32 btf_id; int err; if (!prog->aux->func_info) return -EINVAL; btf_id = prog->aux->func_info[subprog].type_id; if (!btf_id) return -EFAULT; if (prog->aux->func_info_aux[subprog].unreliable) return -EINVAL; is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, false); /* Compiler optimizations can remove arguments from static functions * or mismatched type can be passed into a global function. * In such cases mark the function as unreliable from BTF point of view. */ if (err) prog->aux->func_info_aux[subprog].unreliable = true; return err; } /* Compare BTF of a function call with given bpf_reg_state. * Returns: * EFAULT - there is a verifier bug. Abort verification. * EINVAL - there is a type mismatch or BTF is not available. * 0 - BTF matches with what bpf_reg_state expects. * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. * * NOTE: the code is duplicated from btf_check_subprog_arg_match() * because btf_check_func_arg_match() is still doing both. Once that * function is split in 2, we can call from here btf_check_subprog_arg_match() * first, and then treat the calling part in a new code path. */ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs) { struct bpf_prog *prog = env->prog; struct btf *btf = prog->aux->btf; bool is_global; u32 btf_id; int err; if (!prog->aux->func_info) return -EINVAL; btf_id = prog->aux->func_info[subprog].type_id; if (!btf_id) return -EFAULT; if (prog->aux->func_info_aux[subprog].unreliable) return -EINVAL; is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, true); /* Compiler optimizations can remove arguments from static functions * or mismatched type can be passed into a global function. * In such cases mark the function as unreliable from BTF point of view. */ if (err) prog->aux->func_info_aux[subprog].unreliable = true; return err; } /* Convert BTF of a function into bpf_reg_state if possible * Returns: * EFAULT - there is a verifier bug. Abort verification. * EINVAL - cannot convert BTF. * 0 - Successfully converted BTF into bpf_reg_state * (either PTR_TO_CTX or SCALAR_VALUE). */ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs, bool is_ex_cb) { struct bpf_verifier_log *log = &env->log; struct bpf_prog *prog = env->prog; enum bpf_prog_type prog_type = prog->type; struct btf *btf = prog->aux->btf; const struct btf_param *args; const struct btf_type *t, *ref_t; u32 i, nargs, btf_id; const char *tname; if (!prog->aux->func_info || prog->aux->func_info_aux[subprog].linkage != BTF_FUNC_GLOBAL) { bpf_log(log, "Verifier bug\n"); return -EFAULT; } btf_id = prog->aux->func_info[subprog].type_id; if (!btf_id) { bpf_log(log, "Global functions need valid BTF\n"); return -EFAULT; } t = btf_type_by_id(btf, btf_id); if (!t || !btf_type_is_func(t)) { /* These checks were already done by the verifier while loading * struct bpf_func_info */ bpf_log(log, "BTF of func#%d doesn't point to KIND_FUNC\n", subprog); return -EFAULT; } tname = btf_name_by_offset(btf, t->name_off); if (log->level & BPF_LOG_LEVEL) bpf_log(log, "Validating %s() func#%d...\n", tname, subprog); if (prog->aux->func_info_aux[subprog].unreliable) { bpf_log(log, "Verifier bug in function %s()\n", tname); return -EFAULT; } if (prog_type == BPF_PROG_TYPE_EXT) prog_type = prog->aux->dst_prog->type; t = btf_type_by_id(btf, t->type); if (!t || !btf_type_is_func_proto(t)) { bpf_log(log, "Invalid type of function %s()\n", tname); return -EFAULT; } args = (const struct btf_param *)(t + 1); nargs = btf_type_vlen(t); if (nargs > MAX_BPF_FUNC_REG_ARGS) { bpf_log(log, "Global function %s() with %d > %d args. Buggy compiler.\n", tname, nargs, MAX_BPF_FUNC_REG_ARGS); return -EINVAL; } /* check that function returns int, exception cb also requires this */ t = btf_type_by_id(btf, t->type); while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); if (!btf_type_is_int(t) && !btf_is_any_enum(t)) { bpf_log(log, "Global function %s() doesn't return scalar. Only those are supported.\n", tname); return -EINVAL; } /* Convert BTF function arguments into verifier types. * Only PTR_TO_CTX and SCALAR are supported atm. */ for (i = 0; i < nargs; i++) { struct bpf_reg_state *reg = ®s[i + 1]; t = btf_type_by_id(btf, args[i].type); while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); if (btf_type_is_int(t) || btf_is_any_enum(t)) { reg->type = SCALAR_VALUE; continue; } if (btf_type_is_ptr(t)) { if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { reg->type = PTR_TO_CTX; continue; } t = btf_type_skip_modifiers(btf, t->type, NULL); ref_t = btf_resolve_size(btf, t, ®->mem_size); if (IS_ERR(ref_t)) { bpf_log(log, "arg#%d reference type('%s %s') size cannot be determined: %ld\n", i, btf_type_str(t), btf_name_by_offset(btf, t->name_off), PTR_ERR(ref_t)); return -EINVAL; } reg->type = PTR_TO_MEM | PTR_MAYBE_NULL; reg->id = ++env->id_gen; continue; } bpf_log(log, "Arg#%d type %s in %s() is not supported yet.\n", i, btf_type_str(t), tname); return -EINVAL; } /* We have already ensured that the callback returns an integer, just * like all global subprogs. We need to determine it only has a single * scalar argument. */ if (is_ex_cb && (nargs != 1 || regs[BPF_REG_1].type != SCALAR_VALUE)) { bpf_log(log, "exception cb only supports single integer argument\n"); return -EINVAL; } return 0; } static void btf_type_show(const struct btf *btf, u32 type_id, void *obj, struct btf_show *show) { const struct btf_type *t = btf_type_by_id(btf, type_id); show->btf = btf; memset(&show->state, 0, sizeof(show->state)); memset(&show->obj, 0, sizeof(show->obj)); btf_type_ops(t)->show(btf, t, type_id, obj, 0, show); } static void btf_seq_show(struct btf_show *show, const char *fmt, va_list args) { seq_vprintf((struct seq_file *)show->target, fmt, args); } int btf_type_seq_show_flags(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m, u64 flags) { struct btf_show sseq; sseq.target = m; sseq.showfn = btf_seq_show; sseq.flags = flags; btf_type_show(btf, type_id, obj, &sseq); return sseq.state.status; } void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m) { (void) btf_type_seq_show_flags(btf, type_id, obj, m, BTF_SHOW_NONAME | BTF_SHOW_COMPACT | BTF_SHOW_ZERO | BTF_SHOW_UNSAFE); } struct btf_show_snprintf { struct btf_show show; int len_left; /* space left in string */ int len; /* length we would have written */ }; static void btf_snprintf_show(struct btf_show *show, const char *fmt, va_list args) { struct btf_show_snprintf *ssnprintf = (struct btf_show_snprintf *)show; int len; len = vsnprintf(show->target, ssnprintf->len_left, fmt, args); if (len < 0) { ssnprintf->len_left = 0; ssnprintf->len = len; } else if (len >= ssnprintf->len_left) { /* no space, drive on to get length we would have written */ ssnprintf->len_left = 0; ssnprintf->len += len; } else { ssnprintf->len_left -= len; ssnprintf->len += len; show->target += len; } } int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj, char *buf, int len, u64 flags) { struct btf_show_snprintf ssnprintf; ssnprintf.show.target = buf; ssnprintf.show.flags = flags; ssnprintf.show.showfn = btf_snprintf_show; ssnprintf.len_left = len; ssnprintf.len = 0; btf_type_show(btf, type_id, obj, (struct btf_show *)&ssnprintf); /* If we encountered an error, return it. */ if (ssnprintf.show.state.status) return ssnprintf.show.state.status; /* Otherwise return length we would have written */ return ssnprintf.len; } #ifdef CONFIG_PROC_FS static void bpf_btf_show_fdinfo(struct seq_file *m, struct file *filp) { const struct btf *btf = filp->private_data; seq_printf(m, "btf_id:\t%u\n", btf->id); } #endif static int btf_release(struct inode *inode, struct file *filp) { btf_put(filp->private_data); return 0; } const struct file_operations btf_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = bpf_btf_show_fdinfo, #endif .release = btf_release, }; static int __btf_new_fd(struct btf *btf) { return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC); } int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { struct btf *btf; int ret; btf = btf_parse(attr, uattr, uattr_size); if (IS_ERR(btf)) return PTR_ERR(btf); ret = btf_alloc_id(btf); if (ret) { btf_free(btf); return ret; } /* * The BTF ID is published to the userspace. * All BTF free must go through call_rcu() from * now on (i.e. free by calling btf_put()). */ ret = __btf_new_fd(btf); if (ret < 0) btf_put(btf); return ret; } struct btf *btf_get_by_fd(int fd) { struct btf *btf; struct fd f; f = fdget(fd); if (!f.file) return ERR_PTR(-EBADF); if (f.file->f_op != &btf_fops) { fdput(f); return ERR_PTR(-EINVAL); } btf = f.file->private_data; refcount_inc(&btf->refcnt); fdput(f); return btf; } int btf_get_info_by_fd(const struct btf *btf, const union bpf_attr *attr, union bpf_attr __user *uattr) { struct bpf_btf_info __user *uinfo; struct bpf_btf_info info; u32 info_copy, btf_copy; void __user *ubtf; char __user *uname; u32 uinfo_len, uname_len, name_len; int ret = 0; uinfo = u64_to_user_ptr(attr->info.info); uinfo_len = attr->info.info_len; info_copy = min_t(u32, uinfo_len, sizeof(info)); memset(&info, 0, sizeof(info)); if (copy_from_user(&info, uinfo, info_copy)) return -EFAULT; info.id = btf->id; ubtf = u64_to_user_ptr(info.btf); btf_copy = min_t(u32, btf->data_size, info.btf_size); if (copy_to_user(ubtf, btf->data, btf_copy)) return -EFAULT; info.btf_size = btf->data_size; info.kernel_btf = btf->kernel_btf; uname = u64_to_user_ptr(info.name); uname_len = info.name_len; if (!uname ^ !uname_len) return -EINVAL; name_len = strlen(btf->name); info.name_len = name_len; if (uname) { if (uname_len >= name_len + 1) { if (copy_to_user(uname, btf->name, name_len + 1)) return -EFAULT; } else { char zero = '\0'; if (copy_to_user(uname, btf->name, uname_len - 1)) return -EFAULT; if (put_user(zero, uname + uname_len - 1)) return -EFAULT; /* let user-space know about too short buffer */ ret = -ENOSPC; } } if (copy_to_user(uinfo, &info, info_copy) || put_user(info_copy, &uattr->info.info_len)) return -EFAULT; return ret; } int btf_get_fd_by_id(u32 id) { struct btf *btf; int fd; rcu_read_lock(); btf = idr_find(&btf_idr, id); if (!btf || !refcount_inc_not_zero(&btf->refcnt)) btf = ERR_PTR(-ENOENT); rcu_read_unlock(); if (IS_ERR(btf)) return PTR_ERR(btf); fd = __btf_new_fd(btf); if (fd < 0) btf_put(btf); return fd; } u32 btf_obj_id(const struct btf *btf) { return btf->id; } bool btf_is_kernel(const struct btf *btf) { return btf->kernel_btf; } bool btf_is_module(const struct btf *btf) { return btf->kernel_btf && strcmp(btf->name, "vmlinux") != 0; } enum { BTF_MODULE_F_LIVE = (1 << 0), }; #ifdef CONFIG_DEBUG_INFO_BTF_MODULES struct btf_module { struct list_head list; struct module *module; struct btf *btf; struct bin_attribute *sysfs_attr; int flags; }; static LIST_HEAD(btf_modules); static DEFINE_MUTEX(btf_module_mutex); static ssize_t btf_module_read(struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) { const struct btf *btf = bin_attr->private; memcpy(buf, btf->data + off, len); return len; } static void purge_cand_cache(struct btf *btf); static int btf_module_notify(struct notifier_block *nb, unsigned long op, void *module) { struct btf_module *btf_mod, *tmp; struct module *mod = module; struct btf *btf; int err = 0; if (mod->btf_data_size == 0 || (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE && op != MODULE_STATE_GOING)) goto out; switch (op) { case MODULE_STATE_COMING: btf_mod = kzalloc(sizeof(*btf_mod), GFP_KERNEL); if (!btf_mod) { err = -ENOMEM; goto out; } btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size); if (IS_ERR(btf)) { kfree(btf_mod); if (!IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) { pr_warn("failed to validate module [%s] BTF: %ld\n", mod->name, PTR_ERR(btf)); err = PTR_ERR(btf); } else { pr_warn_once("Kernel module BTF mismatch detected, BTF debug info may be unavailable for some modules\n"); } goto out; } err = btf_alloc_id(btf); if (err) { btf_free(btf); kfree(btf_mod); goto out; } purge_cand_cache(NULL); mutex_lock(&btf_module_mutex); btf_mod->module = module; btf_mod->btf = btf; list_add(&btf_mod->list, &btf_modules); mutex_unlock(&btf_module_mutex); if (IS_ENABLED(CONFIG_SYSFS)) { struct bin_attribute *attr; attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) goto out; sysfs_bin_attr_init(attr); attr->attr.name = btf->name; attr->attr.mode = 0444; attr->size = btf->data_size; attr->private = btf; attr->read = btf_module_read; err = sysfs_create_bin_file(btf_kobj, attr); if (err) { pr_warn("failed to register module [%s] BTF in sysfs: %d\n", mod->name, err); kfree(attr); err = 0; goto out; } btf_mod->sysfs_attr = attr; } break; case MODULE_STATE_LIVE: mutex_lock(&btf_module_mutex); list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { if (btf_mod->module != module) continue; btf_mod->flags |= BTF_MODULE_F_LIVE; break; } mutex_unlock(&btf_module_mutex); break; case MODULE_STATE_GOING: mutex_lock(&btf_module_mutex); list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { if (btf_mod->module != module) continue; list_del(&btf_mod->list); if (btf_mod->sysfs_attr) sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr); purge_cand_cache(btf_mod->btf); btf_put(btf_mod->btf); kfree(btf_mod->sysfs_attr); kfree(btf_mod); break; } mutex_unlock(&btf_module_mutex); break; } out: return notifier_from_errno(err); } static struct notifier_block btf_module_nb = { .notifier_call = btf_module_notify, }; static int __init btf_module_init(void) { register_module_notifier(&btf_module_nb); return 0; } fs_initcall(btf_module_init); #endif /* CONFIG_DEBUG_INFO_BTF_MODULES */ struct module *btf_try_get_module(const struct btf *btf) { struct module *res = NULL; #ifdef CONFIG_DEBUG_INFO_BTF_MODULES struct btf_module *btf_mod, *tmp; mutex_lock(&btf_module_mutex); list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { if (btf_mod->btf != btf) continue; /* We must only consider module whose __init routine has * finished, hence we must check for BTF_MODULE_F_LIVE flag, * which is set from the notifier callback for * MODULE_STATE_LIVE. */ if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module)) res = btf_mod->module; break; } mutex_unlock(&btf_module_mutex); #endif return res; } /* Returns struct btf corresponding to the struct module. * This function can return NULL or ERR_PTR. */ static struct btf *btf_get_module_btf(const struct module *module) { #ifdef CONFIG_DEBUG_INFO_BTF_MODULES struct btf_module *btf_mod, *tmp; #endif struct btf *btf = NULL; if (!module) { btf = bpf_get_btf_vmlinux(); if (!IS_ERR_OR_NULL(btf)) btf_get(btf); return btf; } #ifdef CONFIG_DEBUG_INFO_BTF_MODULES mutex_lock(&btf_module_mutex); list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { if (btf_mod->module != module) continue; btf_get(btf_mod->btf); btf = btf_mod->btf; break; } mutex_unlock(&btf_module_mutex); #endif return btf; } BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags) { struct btf *btf = NULL; int btf_obj_fd = 0; long ret; if (flags) return -EINVAL; if (name_sz <= 1 || name[name_sz - 1]) return -EINVAL; ret = bpf_find_btf_id(name, kind, &btf); if (ret > 0 && btf_is_module(btf)) { btf_obj_fd = __btf_new_fd(btf); if (btf_obj_fd < 0) { btf_put(btf); return btf_obj_fd; } return ret | (((u64)btf_obj_fd) << 32); } if (ret > 0) btf_put(btf); return ret; } const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = { .func = bpf_btf_find_by_name_kind, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_ANYTHING, }; BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE) #define BTF_TRACING_TYPE(name, type) BTF_ID(struct, type) BTF_TRACING_TYPE_xxx #undef BTF_TRACING_TYPE static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name, const struct btf_type *func, u32 func_flags) { u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY); const char *name, *sfx, *iter_name; const struct btf_param *arg; const struct btf_type *t; char exp_name[128]; u32 nr_args; /* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */ if (!flags || (flags & (flags - 1))) return -EINVAL; /* any BPF iter kfunc should have `struct bpf_iter_<type> *` first arg */ nr_args = btf_type_vlen(func); if (nr_args < 1) return -EINVAL; arg = &btf_params(func)[0]; t = btf_type_skip_modifiers(btf, arg->type, NULL); if (!t || !btf_type_is_ptr(t)) return -EINVAL; t = btf_type_skip_modifiers(btf, t->type, NULL); if (!t || !__btf_type_is_struct(t)) return -EINVAL; name = btf_name_by_offset(btf, t->name_off); if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1)) return -EINVAL; /* sizeof(struct bpf_iter_<type>) should be a multiple of 8 to * fit nicely in stack slots */ if (t->size == 0 || (t->size % 8)) return -EINVAL; /* validate bpf_iter_<type>_{new,next,destroy}(struct bpf_iter_<type> *) * naming pattern */ iter_name = name + sizeof(ITER_PREFIX) - 1; if (flags & KF_ITER_NEW) sfx = "new"; else if (flags & KF_ITER_NEXT) sfx = "next"; else /* (flags & KF_ITER_DESTROY) */ sfx = "destroy"; snprintf(exp_name, sizeof(exp_name), "bpf_iter_%s_%s", iter_name, sfx); if (strcmp(func_name, exp_name)) return -EINVAL; /* only iter constructor should have extra arguments */ if (!(flags & KF_ITER_NEW) && nr_args != 1) return -EINVAL; if (flags & KF_ITER_NEXT) { /* bpf_iter_<type>_next() should return pointer */ t = btf_type_skip_modifiers(btf, func->type, NULL); if (!t || !btf_type_is_ptr(t)) return -EINVAL; } if (flags & KF_ITER_DESTROY) { /* bpf_iter_<type>_destroy() should return void */ t = btf_type_by_id(btf, func->type); if (!t || !btf_type_is_void(t)) return -EINVAL; } return 0; } static int btf_check_kfunc_protos(struct btf *btf, u32 func_id, u32 func_flags) { const struct btf_type *func; const char *func_name; int err; /* any kfunc should be FUNC -> FUNC_PROTO */ func = btf_type_by_id(btf, func_id); if (!func || !btf_type_is_func(func)) return -EINVAL; /* sanity check kfunc name */ func_name = btf_name_by_offset(btf, func->name_off); if (!func_name || !func_name[0]) return -EINVAL; func = btf_type_by_id(btf, func->type); if (!func || !btf_type_is_func_proto(func)) return -EINVAL; if (func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY)) { err = btf_check_iter_kfuncs(btf, func_name, func, func_flags); if (err) return err; } return 0; } /* Kernel Function (kfunc) BTF ID set registration API */ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, const struct btf_kfunc_id_set *kset) { struct btf_kfunc_hook_filter *hook_filter; struct btf_id_set8 *add_set = kset->set; bool vmlinux_set = !btf_is_module(btf); bool add_filter = !!kset->filter; struct btf_kfunc_set_tab *tab; struct btf_id_set8 *set; u32 set_cnt; int ret; if (hook >= BTF_KFUNC_HOOK_MAX) { ret = -EINVAL; goto end; } if (!add_set->cnt) return 0; tab = btf->kfunc_set_tab; if (tab && add_filter) { u32 i; hook_filter = &tab->hook_filters[hook]; for (i = 0; i < hook_filter->nr_filters; i++) { if (hook_filter->filters[i] == kset->filter) { add_filter = false; break; } } if (add_filter && hook_filter->nr_filters == BTF_KFUNC_FILTER_MAX_CNT) { ret = -E2BIG; goto end; } } if (!tab) { tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN); if (!tab) return -ENOMEM; btf->kfunc_set_tab = tab; } set = tab->sets[hook]; /* Warn when register_btf_kfunc_id_set is called twice for the same hook * for module sets. */ if (WARN_ON_ONCE(set && !vmlinux_set)) { ret = -EINVAL; goto end; } /* We don't need to allocate, concatenate, and sort module sets, because * only one is allowed per hook. Hence, we can directly assign the * pointer and return. */ if (!vmlinux_set) { tab->sets[hook] = add_set; goto do_add_filter; } /* In case of vmlinux sets, there may be more than one set being * registered per hook. To create a unified set, we allocate a new set * and concatenate all individual sets being registered. While each set * is individually sorted, they may become unsorted when concatenated, * hence re-sorting the final set again is required to make binary * searching the set using btf_id_set8_contains function work. */ set_cnt = set ? set->cnt : 0; if (set_cnt > U32_MAX - add_set->cnt) { ret = -EOVERFLOW; goto end; } if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) { ret = -E2BIG; goto end; } /* Grow set */ set = krealloc(tab->sets[hook], offsetof(struct btf_id_set8, pairs[set_cnt + add_set->cnt]), GFP_KERNEL | __GFP_NOWARN); if (!set) { ret = -ENOMEM; goto end; } /* For newly allocated set, initialize set->cnt to 0 */ if (!tab->sets[hook]) set->cnt = 0; tab->sets[hook] = set; /* Concatenate the two sets */ memcpy(set->pairs + set->cnt, add_set->pairs, add_set->cnt * sizeof(set->pairs[0])); set->cnt += add_set->cnt; sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL); do_add_filter: if (add_filter) { hook_filter = &tab->hook_filters[hook]; hook_filter->filters[hook_filter->nr_filters++] = kset->filter; } return 0; end: btf_free_kfunc_set_tab(btf); return ret; } static u32 *__btf_kfunc_id_set_contains(const struct btf *btf, enum btf_kfunc_hook hook, u32 kfunc_btf_id, const struct bpf_prog *prog) { struct btf_kfunc_hook_filter *hook_filter; struct btf_id_set8 *set; u32 *id, i; if (hook >= BTF_KFUNC_HOOK_MAX) return NULL; if (!btf->kfunc_set_tab) return NULL; hook_filter = &btf->kfunc_set_tab->hook_filters[hook]; for (i = 0; i < hook_filter->nr_filters; i++) { if (hook_filter->filters[i](prog, kfunc_btf_id)) return NULL; } set = btf->kfunc_set_tab->sets[hook]; if (!set) return NULL; id = btf_id_set8_contains(set, kfunc_btf_id); if (!id) return NULL; /* The flags for BTF ID are located next to it */ return id + 1; } static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) { switch (prog_type) { case BPF_PROG_TYPE_UNSPEC: return BTF_KFUNC_HOOK_COMMON; case BPF_PROG_TYPE_XDP: return BTF_KFUNC_HOOK_XDP; case BPF_PROG_TYPE_SCHED_CLS: return BTF_KFUNC_HOOK_TC; case BPF_PROG_TYPE_STRUCT_OPS: return BTF_KFUNC_HOOK_STRUCT_OPS; case BPF_PROG_TYPE_TRACING: case BPF_PROG_TYPE_LSM: return BTF_KFUNC_HOOK_TRACING; case BPF_PROG_TYPE_SYSCALL: return BTF_KFUNC_HOOK_SYSCALL; case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: return BTF_KFUNC_HOOK_CGROUP_SKB; case BPF_PROG_TYPE_SCHED_ACT: return BTF_KFUNC_HOOK_SCHED_ACT; case BPF_PROG_TYPE_SK_SKB: return BTF_KFUNC_HOOK_SK_SKB; case BPF_PROG_TYPE_SOCKET_FILTER: return BTF_KFUNC_HOOK_SOCKET_FILTER; case BPF_PROG_TYPE_LWT_OUT: case BPF_PROG_TYPE_LWT_IN: case BPF_PROG_TYPE_LWT_XMIT: case BPF_PROG_TYPE_LWT_SEG6LOCAL: return BTF_KFUNC_HOOK_LWT; case BPF_PROG_TYPE_NETFILTER: return BTF_KFUNC_HOOK_NETFILTER; default: return BTF_KFUNC_HOOK_MAX; } } /* Caution: * Reference to the module (obtained using btf_try_get_module) corresponding to * the struct btf *MUST* be held when calling this function from verifier * context. This is usually true as we stash references in prog's kfunc_btf_tab; * keeping the reference for the duration of the call provides the necessary * protection for looking up a well-formed btf->kfunc_set_tab. */ u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id, const struct bpf_prog *prog) { enum bpf_prog_type prog_type = resolve_prog_type(prog); enum btf_kfunc_hook hook; u32 *kfunc_flags; kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id, prog); if (kfunc_flags) return kfunc_flags; hook = bpf_prog_type_to_kfunc_hook(prog_type); return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id, prog); } u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id, const struct bpf_prog *prog) { return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id, prog); } static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, const struct btf_kfunc_id_set *kset) { struct btf *btf; int ret, i; btf = btf_get_module_btf(kset->owner); if (!btf) { if (!kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { pr_err("missing vmlinux BTF, cannot register kfuncs\n"); return -ENOENT; } if (kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) pr_warn("missing module BTF, cannot register kfuncs\n"); return 0; } if (IS_ERR(btf)) return PTR_ERR(btf); for (i = 0; i < kset->set->cnt; i++) { ret = btf_check_kfunc_protos(btf, kset->set->pairs[i].id, kset->set->pairs[i].flags); if (ret) goto err_out; } ret = btf_populate_kfunc_set(btf, hook, kset); err_out: btf_put(btf); return ret; } /* This function must be invoked only from initcalls/module init functions */ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *kset) { enum btf_kfunc_hook hook; hook = bpf_prog_type_to_kfunc_hook(prog_type); return __register_btf_kfunc_id_set(hook, kset); } EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set); /* This function must be invoked only from initcalls/module init functions */ int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset) { return __register_btf_kfunc_id_set(BTF_KFUNC_HOOK_FMODRET, kset); } EXPORT_SYMBOL_GPL(register_btf_fmodret_id_set); s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id) { struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab; struct btf_id_dtor_kfunc *dtor; if (!tab) return -ENOENT; /* Even though the size of tab->dtors[0] is > sizeof(u32), we only need * to compare the first u32 with btf_id, so we can reuse btf_id_cmp_func. */ BUILD_BUG_ON(offsetof(struct btf_id_dtor_kfunc, btf_id) != 0); dtor = bsearch(&btf_id, tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func); if (!dtor) return -ENOENT; return dtor->kfunc_btf_id; } static int btf_check_dtor_kfuncs(struct btf *btf, const struct btf_id_dtor_kfunc *dtors, u32 cnt) { const struct btf_type *dtor_func, *dtor_func_proto, *t; const struct btf_param *args; s32 dtor_btf_id; u32 nr_args, i; for (i = 0; i < cnt; i++) { dtor_btf_id = dtors[i].kfunc_btf_id; dtor_func = btf_type_by_id(btf, dtor_btf_id); if (!dtor_func || !btf_type_is_func(dtor_func)) return -EINVAL; dtor_func_proto = btf_type_by_id(btf, dtor_func->type); if (!dtor_func_proto || !btf_type_is_func_proto(dtor_func_proto)) return -EINVAL; /* Make sure the prototype of the destructor kfunc is 'void func(type *)' */ t = btf_type_by_id(btf, dtor_func_proto->type); if (!t || !btf_type_is_void(t)) return -EINVAL; nr_args = btf_type_vlen(dtor_func_proto); if (nr_args != 1) return -EINVAL; args = btf_params(dtor_func_proto); t = btf_type_by_id(btf, args[0].type); /* Allow any pointer type, as width on targets Linux supports * will be same for all pointer types (i.e. sizeof(void *)) */ if (!t || !btf_type_is_ptr(t)) return -EINVAL; } return 0; } /* This function must be invoked only from initcalls/module init functions */ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, struct module *owner) { struct btf_id_dtor_kfunc_tab *tab; struct btf *btf; u32 tab_cnt; int ret; btf = btf_get_module_btf(owner); if (!btf) { if (!owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { pr_err("missing vmlinux BTF, cannot register dtor kfuncs\n"); return -ENOENT; } if (owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) { pr_err("missing module BTF, cannot register dtor kfuncs\n"); return -ENOENT; } return 0; } if (IS_ERR(btf)) return PTR_ERR(btf); if (add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) { pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT); ret = -E2BIG; goto end; } /* Ensure that the prototype of dtor kfuncs being registered is sane */ ret = btf_check_dtor_kfuncs(btf, dtors, add_cnt); if (ret < 0) goto end; tab = btf->dtor_kfunc_tab; /* Only one call allowed for modules */ if (WARN_ON_ONCE(tab && btf_is_module(btf))) { ret = -EINVAL; goto end; } tab_cnt = tab ? tab->cnt : 0; if (tab_cnt > U32_MAX - add_cnt) { ret = -EOVERFLOW; goto end; } if (tab_cnt + add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) { pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT); ret = -E2BIG; goto end; } tab = krealloc(btf->dtor_kfunc_tab, offsetof(struct btf_id_dtor_kfunc_tab, dtors[tab_cnt + add_cnt]), GFP_KERNEL | __GFP_NOWARN); if (!tab) { ret = -ENOMEM; goto end; } if (!btf->dtor_kfunc_tab) tab->cnt = 0; btf->dtor_kfunc_tab = tab; memcpy(tab->dtors + tab->cnt, dtors, add_cnt * sizeof(tab->dtors[0])); tab->cnt += add_cnt; sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL); end: if (ret) btf_free_dtor_kfunc_tab(btf); btf_put(btf); return ret; } EXPORT_SYMBOL_GPL(register_btf_id_dtor_kfuncs); #define MAX_TYPES_ARE_COMPAT_DEPTH 2 /* Check local and target types for compatibility. This check is used for * type-based CO-RE relocations and follow slightly different rules than * field-based relocations. This function assumes that root types were already * checked for name match. Beyond that initial root-level name check, names * are completely ignored. Compatibility rules are as follows: * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs/ENUM64s are considered compatible, but * kind should match for local and target types (i.e., STRUCT is not * compatible with UNION); * - for ENUMs/ENUM64s, the size is ignored; * - for INT, size and signedness are ignored; * - for ARRAY, dimensionality is ignored, element types are checked for * compatibility recursively; * - CONST/VOLATILE/RESTRICT modifiers are ignored; * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; * - FUNC_PROTOs are compatible if they have compatible signature: same * number of input args and compatible return and argument types. * These rules are not set in stone and probably will be adjusted as we get * more experience with using BPF CO-RE relocations. */ int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id) { return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, MAX_TYPES_ARE_COMPAT_DEPTH); } #define MAX_TYPES_MATCH_DEPTH 2 int bpf_core_types_match(const struct btf *local_btf, u32 local_id, const struct btf *targ_btf, u32 targ_id) { return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, MAX_TYPES_MATCH_DEPTH); } static bool bpf_core_is_flavor_sep(const char *s) { /* check X___Y name pattern, where X and Y are not underscores */ return s[0] != '_' && /* X */ s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ s[4] != '_'; /* Y */ } size_t bpf_core_essential_name_len(const char *name) { size_t n = strlen(name); int i; for (i = n - 5; i >= 0; i--) { if (bpf_core_is_flavor_sep(name + i)) return i + 1; } return n; } struct bpf_cand_cache { const char *name; u32 name_len; u16 kind; u16 cnt; struct { const struct btf *btf; u32 id; } cands[]; }; static void bpf_free_cands(struct bpf_cand_cache *cands) { if (!cands->cnt) /* empty candidate array was allocated on stack */ return; kfree(cands); } static void bpf_free_cands_from_cache(struct bpf_cand_cache *cands) { kfree(cands->name); kfree(cands); } #define VMLINUX_CAND_CACHE_SIZE 31 static struct bpf_cand_cache *vmlinux_cand_cache[VMLINUX_CAND_CACHE_SIZE]; #define MODULE_CAND_CACHE_SIZE 31 static struct bpf_cand_cache *module_cand_cache[MODULE_CAND_CACHE_SIZE]; static DEFINE_MUTEX(cand_cache_mutex); static void __print_cand_cache(struct bpf_verifier_log *log, struct bpf_cand_cache **cache, int cache_size) { struct bpf_cand_cache *cc; int i, j; for (i = 0; i < cache_size; i++) { cc = cache[i]; if (!cc) continue; bpf_log(log, "[%d]%s(", i, cc->name); for (j = 0; j < cc->cnt; j++) { bpf_log(log, "%d", cc->cands[j].id); if (j < cc->cnt - 1) bpf_log(log, " "); } bpf_log(log, "), "); } } static void print_cand_cache(struct bpf_verifier_log *log) { mutex_lock(&cand_cache_mutex); bpf_log(log, "vmlinux_cand_cache:"); __print_cand_cache(log, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); bpf_log(log, "\nmodule_cand_cache:"); __print_cand_cache(log, module_cand_cache, MODULE_CAND_CACHE_SIZE); bpf_log(log, "\n"); mutex_unlock(&cand_cache_mutex); } static u32 hash_cands(struct bpf_cand_cache *cands) { return jhash(cands->name, cands->name_len, 0); } static struct bpf_cand_cache *check_cand_cache(struct bpf_cand_cache *cands, struct bpf_cand_cache **cache, int cache_size) { struct bpf_cand_cache *cc = cache[hash_cands(cands) % cache_size]; if (cc && cc->name_len == cands->name_len && !strncmp(cc->name, cands->name, cands->name_len)) return cc; return NULL; } static size_t sizeof_cands(int cnt) { return offsetof(struct bpf_cand_cache, cands[cnt]); } static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands, struct bpf_cand_cache **cache, int cache_size) { struct bpf_cand_cache **cc = &cache[hash_cands(cands) % cache_size], *new_cands; if (*cc) { bpf_free_cands_from_cache(*cc); *cc = NULL; } new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL); if (!new_cands) { bpf_free_cands(cands); return ERR_PTR(-ENOMEM); } /* strdup the name, since it will stay in cache. * the cands->name points to strings in prog's BTF and the prog can be unloaded. */ new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL); bpf_free_cands(cands); if (!new_cands->name) { kfree(new_cands); return ERR_PTR(-ENOMEM); } *cc = new_cands; return new_cands; } #ifdef CONFIG_DEBUG_INFO_BTF_MODULES static void __purge_cand_cache(struct btf *btf, struct bpf_cand_cache **cache, int cache_size) { struct bpf_cand_cache *cc; int i, j; for (i = 0; i < cache_size; i++) { cc = cache[i]; if (!cc) continue; if (!btf) { /* when new module is loaded purge all of module_cand_cache, * since new module might have candidates with the name * that matches cached cands. */ bpf_free_cands_from_cache(cc); cache[i] = NULL; continue; } /* when module is unloaded purge cache entries * that match module's btf */ for (j = 0; j < cc->cnt; j++) if (cc->cands[j].btf == btf) { bpf_free_cands_from_cache(cc); cache[i] = NULL; break; } } } static void purge_cand_cache(struct btf *btf) { mutex_lock(&cand_cache_mutex); __purge_cand_cache(btf, module_cand_cache, MODULE_CAND_CACHE_SIZE); mutex_unlock(&cand_cache_mutex); } #endif static struct bpf_cand_cache * bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf, int targ_start_id) { struct bpf_cand_cache *new_cands; const struct btf_type *t; const char *targ_name; size_t targ_essent_len; int n, i; n = btf_nr_types(targ_btf); for (i = targ_start_id; i < n; i++) { t = btf_type_by_id(targ_btf, i); if (btf_kind(t) != cands->kind) continue; targ_name = btf_name_by_offset(targ_btf, t->name_off); if (!targ_name) continue; /* the resched point is before strncmp to make sure that search * for non-existing name will have a chance to schedule(). */ cond_resched(); if (strncmp(cands->name, targ_name, cands->name_len) != 0) continue; targ_essent_len = bpf_core_essential_name_len(targ_name); if (targ_essent_len != cands->name_len) continue; /* most of the time there is only one candidate for a given kind+name pair */ new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL); if (!new_cands) { bpf_free_cands(cands); return ERR_PTR(-ENOMEM); } memcpy(new_cands, cands, sizeof_cands(cands->cnt)); bpf_free_cands(cands); cands = new_cands; cands->cands[cands->cnt].btf = targ_btf; cands->cands[cands->cnt].id = i; cands->cnt++; } return cands; } static struct bpf_cand_cache * bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id) { struct bpf_cand_cache *cands, *cc, local_cand = {}; const struct btf *local_btf = ctx->btf; const struct btf_type *local_type; const struct btf *main_btf; size_t local_essent_len; struct btf *mod_btf; const char *name; int id; main_btf = bpf_get_btf_vmlinux(); if (IS_ERR(main_btf)) return ERR_CAST(main_btf); if (!main_btf) return ERR_PTR(-EINVAL); local_type = btf_type_by_id(local_btf, local_type_id); if (!local_type) return ERR_PTR(-EINVAL); name = btf_name_by_offset(local_btf, local_type->name_off); if (str_is_empty(name)) return ERR_PTR(-EINVAL); local_essent_len = bpf_core_essential_name_len(name); cands = &local_cand; cands->name = name; cands->kind = btf_kind(local_type); cands->name_len = local_essent_len; cc = check_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); /* cands is a pointer to stack here */ if (cc) { if (cc->cnt) return cc; goto check_modules; } /* Attempt to find target candidates in vmlinux BTF first */ cands = bpf_core_add_cands(cands, main_btf, 1); if (IS_ERR(cands)) return ERR_CAST(cands); /* cands is a pointer to kmalloced memory here if cands->cnt > 0 */ /* populate cache even when cands->cnt == 0 */ cc = populate_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); if (IS_ERR(cc)) return ERR_CAST(cc); /* if vmlinux BTF has any candidate, don't go for module BTFs */ if (cc->cnt) return cc; check_modules: /* cands is a pointer to stack here and cands->cnt == 0 */ cc = check_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE); if (cc) /* if cache has it return it even if cc->cnt == 0 */ return cc; /* If candidate is not found in vmlinux's BTF then search in module's BTFs */ spin_lock_bh(&btf_idr_lock); idr_for_each_entry(&btf_idr, mod_btf, id) { if (!btf_is_module(mod_btf)) continue; /* linear search could be slow hence unlock/lock * the IDR to avoiding holding it for too long */ btf_get(mod_btf); spin_unlock_bh(&btf_idr_lock); cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf)); btf_put(mod_btf); if (IS_ERR(cands)) return ERR_CAST(cands); spin_lock_bh(&btf_idr_lock); } spin_unlock_bh(&btf_idr_lock); /* cands is a pointer to kmalloced memory here if cands->cnt > 0 * or pointer to stack if cands->cnd == 0. * Copy it into the cache even when cands->cnt == 0 and * return the result. */ return populate_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE); } int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, int relo_idx, void *insn) { bool need_cands = relo->kind != BPF_CORE_TYPE_ID_LOCAL; struct bpf_core_cand_list cands = {}; struct bpf_core_relo_res targ_res; struct bpf_core_spec *specs; int err; /* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5" * into arrays of btf_ids of struct fields and array indices. */ specs = kcalloc(3, sizeof(*specs), GFP_KERNEL); if (!specs) return -ENOMEM; if (need_cands) { struct bpf_cand_cache *cc; int i; mutex_lock(&cand_cache_mutex); cc = bpf_core_find_cands(ctx, relo->type_id); if (IS_ERR(cc)) { bpf_log(ctx->log, "target candidate search failed for %d\n", relo->type_id); err = PTR_ERR(cc); goto out; } if (cc->cnt) { cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL); if (!cands.cands) { err = -ENOMEM; goto out; } } for (i = 0; i < cc->cnt; i++) { bpf_log(ctx->log, "CO-RE relocating %s %s: found target candidate [%d]\n", btf_kind_str[cc->kind], cc->name, cc->cands[i].id); cands.cands[i].btf = cc->cands[i].btf; cands.cands[i].id = cc->cands[i].id; } cands.len = cc->cnt; /* cand_cache_mutex needs to span the cache lookup and * copy of btf pointer into bpf_core_cand_list, * since module can be unloaded while bpf_core_calc_relo_insn * is working with module's btf. */ } err = bpf_core_calc_relo_insn((void *)ctx->log, relo, relo_idx, ctx->btf, &cands, specs, &targ_res); if (err) goto out; err = bpf_core_patch_insn((void *)ctx->log, insn, relo->insn_off / 8, relo, relo_idx, &targ_res); out: kfree(specs); if (need_cands) { kfree(cands.cands); mutex_unlock(&cand_cache_mutex); if (ctx->log->level & BPF_LOG_LEVEL2) print_cand_cache(ctx->log); } return err; } bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, const char *field_name, u32 btf_id, const char *suffix) { struct btf *btf = reg->btf; const struct btf_type *walk_type, *safe_type; const char *tname; char safe_tname[64]; long ret, safe_id; const struct btf_member *member; u32 i; walk_type = btf_type_by_id(btf, reg->btf_id); if (!walk_type) return false; tname = btf_name_by_offset(btf, walk_type->name_off); ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix); if (ret >= sizeof(safe_tname)) return false; safe_id = btf_find_by_name_kind(btf, safe_tname, BTF_INFO_KIND(walk_type->info)); if (safe_id < 0) return false; safe_type = btf_type_by_id(btf, safe_id); if (!safe_type) return false; for_each_member(i, safe_type, member) { const char *m_name = __btf_name_by_offset(btf, member->name_off); const struct btf_type *mtype = btf_type_by_id(btf, member->type); u32 id; if (!btf_type_is_ptr(mtype)) continue; btf_type_skip_modifiers(btf, mtype->type, &id); /* If we match on both type and name, the field is considered trusted. */ if (btf_id == id && !strcmp(field_name, m_name)) return true; } return false; } bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log, const struct btf *reg_btf, u32 reg_id, const struct btf *arg_btf, u32 arg_id) { const char *reg_name, *arg_name, *search_needle; const struct btf_type *reg_type, *arg_type; int reg_len, arg_len, cmp_len; size_t pattern_len = sizeof(NOCAST_ALIAS_SUFFIX) - sizeof(char); reg_type = btf_type_by_id(reg_btf, reg_id); if (!reg_type) return false; arg_type = btf_type_by_id(arg_btf, arg_id); if (!arg_type) return false; reg_name = btf_name_by_offset(reg_btf, reg_type->name_off); arg_name = btf_name_by_offset(arg_btf, arg_type->name_off); reg_len = strlen(reg_name); arg_len = strlen(arg_name); /* Exactly one of the two type names may be suffixed with ___init, so * if the strings are the same size, they can't possibly be no-cast * aliases of one another. If you have two of the same type names, e.g. * they're both nf_conn___init, it would be improper to return true * because they are _not_ no-cast aliases, they are the same type. */ if (reg_len == arg_len) return false; /* Either of the two names must be the other name, suffixed with ___init. */ if ((reg_len != arg_len + pattern_len) && (arg_len != reg_len + pattern_len)) return false; if (reg_len < arg_len) { search_needle = strstr(arg_name, NOCAST_ALIAS_SUFFIX); cmp_len = reg_len; } else { search_needle = strstr(reg_name, NOCAST_ALIAS_SUFFIX); cmp_len = arg_len; } if (!search_needle) return false; /* ___init suffix must come at the end of the name */ if (*(search_needle + pattern_len) != '\0') return false; return !strncmp(reg_name, arg_name, cmp_len); } |
1 92 16 1 45 33 33 47 29 4 17 14 17 5 5 28 27 5 28 1 73 50 14 7 5 15 14 5 29 1 13 1 9 3 4 1 1 1 11 1 16 15 2 9 19 6 2 4 6 6 6 6 6 6 6 6 6 6 6 6 6 6 1 5 7 50 1 1 76 9 65 3 58 3 1 8 19 5 27 61 61 60 36 36 11 13 13 58 37 5 17 27 31 58 1 1 3 1 2 102 102 102 1 8 102 102 1 8 18 18 7 18 18 18 1 3 91 69 35 18 86 71 20 89 14 86 14 86 5 90 1 85 8 91 1 16 83 7 1 13 13 91 96 1 13 3 91 76 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/isofs/inode.c * * (C) 1991 Linus Torvalds - minix filesystem * 1992, 1993, 1994 Eric Youngdale Modified for ISO 9660 filesystem. * 1994 Eberhard Mönkeberg - multi session handling. * 1995 Mark Dobie - allow mounting of some weird VideoCDs and PhotoCDs. * 1997 Gordon Chaffee - Joliet CDs * 1998 Eric Lammerts - ISO 9660 Level 3 * 2004 Paul Serice - Inode Support pushed out from 4GB to 128GB * 2004 Paul Serice - NFS Export Operations */ #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/cred.h> #include <linux/nls.h> #include <linux/ctype.h> #include <linux/statfs.h> #include <linux/cdrom.h> #include <linux/parser.h> #include <linux/mpage.h> #include <linux/user_namespace.h> #include <linux/seq_file.h> #include <linux/blkdev.h> #include "isofs.h" #include "zisofs.h" /* max tz offset is 13 hours */ #define MAX_TZ_OFFSET (52*15*60) #define BEQUIET static int isofs_hashi(const struct dentry *parent, struct qstr *qstr); static int isofs_dentry_cmpi(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); #ifdef CONFIG_JOLIET static int isofs_hashi_ms(const struct dentry *parent, struct qstr *qstr); static int isofs_hash_ms(const struct dentry *parent, struct qstr *qstr); static int isofs_dentry_cmpi_ms(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); static int isofs_dentry_cmp_ms(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); #endif static void isofs_put_super(struct super_block *sb) { struct isofs_sb_info *sbi = ISOFS_SB(sb); #ifdef CONFIG_JOLIET unload_nls(sbi->s_nls_iocharset); #endif kfree(sbi); sb->s_fs_info = NULL; return; } static int isofs_read_inode(struct inode *, int relocated); static int isofs_statfs (struct dentry *, struct kstatfs *); static int isofs_show_options(struct seq_file *, struct dentry *); static struct kmem_cache *isofs_inode_cachep; static struct inode *isofs_alloc_inode(struct super_block *sb) { struct iso_inode_info *ei; ei = alloc_inode_sb(sb, isofs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; } static void isofs_free_inode(struct inode *inode) { kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); } static void init_once(void *foo) { struct iso_inode_info *ei = foo; inode_init_once(&ei->vfs_inode); } static int __init init_inodecache(void) { isofs_inode_cachep = kmem_cache_create("isofs_inode_cache", sizeof(struct iso_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_ACCOUNT), init_once); if (!isofs_inode_cachep) return -ENOMEM; return 0; } static void destroy_inodecache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(isofs_inode_cachep); } static int isofs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); if (!(*flags & SB_RDONLY)) return -EROFS; return 0; } static const struct super_operations isofs_sops = { .alloc_inode = isofs_alloc_inode, .free_inode = isofs_free_inode, .put_super = isofs_put_super, .statfs = isofs_statfs, .remount_fs = isofs_remount, .show_options = isofs_show_options, }; static const struct dentry_operations isofs_dentry_ops[] = { { .d_hash = isofs_hashi, .d_compare = isofs_dentry_cmpi, }, #ifdef CONFIG_JOLIET { .d_hash = isofs_hash_ms, .d_compare = isofs_dentry_cmp_ms, }, { .d_hash = isofs_hashi_ms, .d_compare = isofs_dentry_cmpi_ms, }, #endif }; struct iso9660_options{ unsigned int rock:1; unsigned int joliet:1; unsigned int cruft:1; unsigned int hide:1; unsigned int showassoc:1; unsigned int nocompress:1; unsigned int overriderockperm:1; unsigned int uid_set:1; unsigned int gid_set:1; unsigned char map; unsigned char check; unsigned int blocksize; umode_t fmode; umode_t dmode; kgid_t gid; kuid_t uid; char *iocharset; /* LVE */ s32 session; s32 sbsector; }; /* * Compute the hash for the isofs name corresponding to the dentry. */ static int isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms) { const char *name; int len; char c; unsigned long hash; len = qstr->len; name = qstr->name; if (ms) { while (len && name[len-1] == '.') len--; } hash = init_name_hash(dentry); while (len--) { c = tolower(*name++); hash = partial_name_hash(c, hash); } qstr->hash = end_name_hash(hash); return 0; } /* * Compare of two isofs names. */ static int isofs_dentry_cmp_common( unsigned int len, const char *str, const struct qstr *name, int ms, int ci) { int alen, blen; /* A filename cannot end in '.' or we treat it like it has none */ alen = name->len; blen = len; if (ms) { while (alen && name->name[alen-1] == '.') alen--; while (blen && str[blen-1] == '.') blen--; } if (alen == blen) { if (ci) { if (strncasecmp(name->name, str, alen) == 0) return 0; } else { if (strncmp(name->name, str, alen) == 0) return 0; } } return 1; } static int isofs_hashi(const struct dentry *dentry, struct qstr *qstr) { return isofs_hashi_common(dentry, qstr, 0); } static int isofs_dentry_cmpi(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { return isofs_dentry_cmp_common(len, str, name, 0, 1); } #ifdef CONFIG_JOLIET /* * Compute the hash for the isofs name corresponding to the dentry. */ static int isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms) { const char *name; int len; len = qstr->len; name = qstr->name; if (ms) { while (len && name[len-1] == '.') len--; } qstr->hash = full_name_hash(dentry, name, len); return 0; } static int isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) { return isofs_hash_common(dentry, qstr, 1); } static int isofs_hashi_ms(const struct dentry *dentry, struct qstr *qstr) { return isofs_hashi_common(dentry, qstr, 1); } static int isofs_dentry_cmp_ms(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { return isofs_dentry_cmp_common(len, str, name, 1, 0); } static int isofs_dentry_cmpi_ms(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { return isofs_dentry_cmp_common(len, str, name, 1, 1); } #endif enum { Opt_block, Opt_check_r, Opt_check_s, Opt_cruft, Opt_gid, Opt_ignore, Opt_iocharset, Opt_map_a, Opt_map_n, Opt_map_o, Opt_mode, Opt_nojoliet, Opt_norock, Opt_sb, Opt_session, Opt_uid, Opt_unhide, Opt_utf8, Opt_err, Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, Opt_overriderockperm, }; static const match_table_t tokens = { {Opt_norock, "norock"}, {Opt_nojoliet, "nojoliet"}, {Opt_unhide, "unhide"}, {Opt_hide, "hide"}, {Opt_showassoc, "showassoc"}, {Opt_cruft, "cruft"}, {Opt_utf8, "utf8"}, {Opt_iocharset, "iocharset=%s"}, {Opt_map_a, "map=acorn"}, {Opt_map_a, "map=a"}, {Opt_map_n, "map=normal"}, {Opt_map_n, "map=n"}, {Opt_map_o, "map=off"}, {Opt_map_o, "map=o"}, {Opt_session, "session=%u"}, {Opt_sb, "sbsector=%u"}, {Opt_check_r, "check=relaxed"}, {Opt_check_r, "check=r"}, {Opt_check_s, "check=strict"}, {Opt_check_s, "check=s"}, {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_mode, "mode=%u"}, {Opt_dmode, "dmode=%u"}, {Opt_overriderockperm, "overriderockperm"}, {Opt_block, "block=%u"}, {Opt_ignore, "conv=binary"}, {Opt_ignore, "conv=b"}, {Opt_ignore, "conv=text"}, {Opt_ignore, "conv=t"}, {Opt_ignore, "conv=mtext"}, {Opt_ignore, "conv=m"}, {Opt_ignore, "conv=auto"}, {Opt_ignore, "conv=a"}, {Opt_nocompress, "nocompress"}, {Opt_err, NULL} }; static int parse_options(char *options, struct iso9660_options *popt) { char *p; int option; unsigned int uv; popt->map = 'n'; popt->rock = 1; popt->joliet = 1; popt->cruft = 0; popt->hide = 0; popt->showassoc = 0; popt->check = 'u'; /* unset */ popt->nocompress = 0; popt->blocksize = 1024; popt->fmode = popt->dmode = ISOFS_INVALID_MODE; popt->uid_set = 0; popt->gid_set = 0; popt->gid = GLOBAL_ROOT_GID; popt->uid = GLOBAL_ROOT_UID; popt->iocharset = NULL; popt->overriderockperm = 0; popt->session=-1; popt->sbsector=-1; if (!options) return 1; while ((p = strsep(&options, ",")) != NULL) { int token; substring_t args[MAX_OPT_ARGS]; unsigned n; if (!*p) continue; token = match_token(p, tokens, args); switch (token) { case Opt_norock: popt->rock = 0; break; case Opt_nojoliet: popt->joliet = 0; break; case Opt_hide: popt->hide = 1; break; case Opt_unhide: case Opt_showassoc: popt->showassoc = 1; break; case Opt_cruft: popt->cruft = 1; break; #ifdef CONFIG_JOLIET case Opt_utf8: kfree(popt->iocharset); popt->iocharset = kstrdup("utf8", GFP_KERNEL); if (!popt->iocharset) return 0; break; case Opt_iocharset: kfree(popt->iocharset); popt->iocharset = match_strdup(&args[0]); if (!popt->iocharset) return 0; break; #endif case Opt_map_a: popt->map = 'a'; break; case Opt_map_o: popt->map = 'o'; break; case Opt_map_n: popt->map = 'n'; break; case Opt_session: if (match_int(&args[0], &option)) return 0; n = option; /* * Track numbers are supposed to be in range 1-99, the * mount option starts indexing at 0. */ if (n >= 99) return 0; popt->session = n + 1; break; case Opt_sb: if (match_int(&args[0], &option)) return 0; popt->sbsector = option; break; case Opt_check_r: popt->check = 'r'; break; case Opt_check_s: popt->check = 's'; break; case Opt_ignore: break; case Opt_uid: if (match_uint(&args[0], &uv)) return 0; popt->uid = make_kuid(current_user_ns(), uv); if (!uid_valid(popt->uid)) return 0; popt->uid_set = 1; break; case Opt_gid: if (match_uint(&args[0], &uv)) return 0; popt->gid = make_kgid(current_user_ns(), uv); if (!gid_valid(popt->gid)) return 0; popt->gid_set = 1; break; case Opt_mode: if (match_int(&args[0], &option)) return 0; popt->fmode = option; break; case Opt_dmode: if (match_int(&args[0], &option)) return 0; popt->dmode = option; break; case Opt_overriderockperm: popt->overriderockperm = 1; break; case Opt_block: if (match_int(&args[0], &option)) return 0; n = option; if (n != 512 && n != 1024 && n != 2048) return 0; popt->blocksize = n; break; case Opt_nocompress: popt->nocompress = 1; break; default: return 0; } } return 1; } /* * Display the mount options in /proc/mounts. */ static int isofs_show_options(struct seq_file *m, struct dentry *root) { struct isofs_sb_info *sbi = ISOFS_SB(root->d_sb); if (!sbi->s_rock) seq_puts(m, ",norock"); else if (!sbi->s_joliet_level) seq_puts(m, ",nojoliet"); if (sbi->s_cruft) seq_puts(m, ",cruft"); if (sbi->s_hide) seq_puts(m, ",hide"); if (sbi->s_nocompress) seq_puts(m, ",nocompress"); if (sbi->s_overriderockperm) seq_puts(m, ",overriderockperm"); if (sbi->s_showassoc) seq_puts(m, ",showassoc"); if (sbi->s_check) seq_printf(m, ",check=%c", sbi->s_check); if (sbi->s_mapping) seq_printf(m, ",map=%c", sbi->s_mapping); if (sbi->s_session != 255) seq_printf(m, ",session=%u", sbi->s_session - 1); if (sbi->s_sbsector != -1) seq_printf(m, ",sbsector=%u", sbi->s_sbsector); if (root->d_sb->s_blocksize != 1024) seq_printf(m, ",blocksize=%lu", root->d_sb->s_blocksize); if (sbi->s_uid_set) seq_printf(m, ",uid=%u", from_kuid_munged(&init_user_ns, sbi->s_uid)); if (sbi->s_gid_set) seq_printf(m, ",gid=%u", from_kgid_munged(&init_user_ns, sbi->s_gid)); if (sbi->s_dmode != ISOFS_INVALID_MODE) seq_printf(m, ",dmode=%o", sbi->s_dmode); if (sbi->s_fmode != ISOFS_INVALID_MODE) seq_printf(m, ",fmode=%o", sbi->s_fmode); #ifdef CONFIG_JOLIET if (sbi->s_nls_iocharset) seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset); else seq_puts(m, ",iocharset=utf8"); #endif return 0; } /* * look if the driver can tell the multi session redirection value * * don't change this if you don't know what you do, please! * Multisession is legal only with XA disks. * A non-XA disk with more than one volume descriptor may do it right, but * usually is written in a nowhere standardized "multi-partition" manner. * Multisession uses absolute addressing (solely the first frame of the whole * track is #0), multi-partition uses relative addressing (each first frame of * each track is #0), and a track is not a session. * * A broken CDwriter software or drive firmware does not set new standards, * at least not if conflicting with the existing ones. * * emoenke@gwdg.de */ #define WE_OBEY_THE_WRITTEN_STANDARDS 1 static unsigned int isofs_get_last_session(struct super_block *sb, s32 session) { struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk); unsigned int vol_desc_start = 0; if (session > 0) { struct cdrom_tocentry te; if (!cdi) return 0; te.cdte_track = session; te.cdte_format = CDROM_LBA; if (cdrom_read_tocentry(cdi, &te) == 0) { printk(KERN_DEBUG "ISOFS: Session %d start %d type %d\n", session, te.cdte_addr.lba, te.cdte_ctrl & CDROM_DATA_TRACK); if ((te.cdte_ctrl & CDROM_DATA_TRACK) == 4) return te.cdte_addr.lba; } printk(KERN_ERR "ISOFS: Invalid session number or type of track\n"); } if (cdi) { struct cdrom_multisession ms_info; ms_info.addr_format = CDROM_LBA; if (cdrom_multisession(cdi, &ms_info) == 0) { #if WE_OBEY_THE_WRITTEN_STANDARDS /* necessary for a valid ms_info.addr */ if (ms_info.xa_flag) #endif vol_desc_start = ms_info.addr.lba; } } return vol_desc_start; } /* * Check if root directory is empty (has less than 3 files). * * Used to detect broken CDs where ISO root directory is empty but Joliet root * directory is OK. If such CD has Rock Ridge extensions, they will be disabled * (and Joliet used instead) or else no files would be visible. */ static bool rootdir_empty(struct super_block *sb, unsigned long block) { int offset = 0, files = 0, de_len; struct iso_directory_record *de; struct buffer_head *bh; bh = sb_bread(sb, block); if (!bh) return true; while (files < 3) { de = (struct iso_directory_record *) (bh->b_data + offset); de_len = *(unsigned char *) de; if (de_len == 0) break; files++; offset += de_len; } brelse(bh); return files < 3; } /* * Initialize the superblock and read the root inode. */ static int isofs_fill_super(struct super_block *s, void *data, int silent) { struct buffer_head *bh = NULL, *pri_bh = NULL; struct hs_primary_descriptor *h_pri = NULL; struct iso_primary_descriptor *pri = NULL; struct iso_supplementary_descriptor *sec = NULL; struct iso_directory_record *rootp; struct inode *inode; struct iso9660_options opt; struct isofs_sb_info *sbi; unsigned long first_data_zone; int joliet_level = 0; int iso_blknum, block; int orig_zonesize; int table, error = -EINVAL; unsigned int vol_desc_start; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) return -ENOMEM; s->s_fs_info = sbi; if (!parse_options((char *)data, &opt)) goto out_freesbi; /* * First of all, get the hardware blocksize for this device. * If we don't know what it is, or the hardware blocksize is * larger than the blocksize the user specified, then use * that value. */ /* * What if bugger tells us to go beyond page size? */ if (bdev_logical_block_size(s->s_bdev) > 2048) { printk(KERN_WARNING "ISOFS: unsupported/invalid hardware sector size %d\n", bdev_logical_block_size(s->s_bdev)); goto out_freesbi; } opt.blocksize = sb_min_blocksize(s, opt.blocksize); sbi->s_high_sierra = 0; /* default is iso9660 */ sbi->s_session = opt.session; sbi->s_sbsector = opt.sbsector; vol_desc_start = (opt.sbsector != -1) ? opt.sbsector : isofs_get_last_session(s,opt.session); for (iso_blknum = vol_desc_start+16; iso_blknum < vol_desc_start+100; iso_blknum++) { struct hs_volume_descriptor *hdp; struct iso_volume_descriptor *vdp; block = iso_blknum << (ISOFS_BLOCK_BITS - s->s_blocksize_bits); if (!(bh = sb_bread(s, block))) goto out_no_read; vdp = (struct iso_volume_descriptor *)bh->b_data; hdp = (struct hs_volume_descriptor *)bh->b_data; /* * Due to the overlapping physical location of the descriptors, * ISO CDs can match hdp->id==HS_STANDARD_ID as well. To ensure * proper identification in this case, we first check for ISO. */ if (strncmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) == 0) { if (isonum_711(vdp->type) == ISO_VD_END) break; if (isonum_711(vdp->type) == ISO_VD_PRIMARY) { if (!pri) { pri = (struct iso_primary_descriptor *)vdp; /* Save the buffer in case we need it ... */ pri_bh = bh; bh = NULL; } } #ifdef CONFIG_JOLIET else if (isonum_711(vdp->type) == ISO_VD_SUPPLEMENTARY) { sec = (struct iso_supplementary_descriptor *)vdp; if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) { if (opt.joliet) { if (sec->escape[2] == 0x40) joliet_level = 1; else if (sec->escape[2] == 0x43) joliet_level = 2; else if (sec->escape[2] == 0x45) joliet_level = 3; printk(KERN_DEBUG "ISO 9660 Extensions: " "Microsoft Joliet Level %d\n", joliet_level); } goto root_found; } else { /* Unknown supplementary volume descriptor */ sec = NULL; } } #endif } else { if (strncmp (hdp->id, HS_STANDARD_ID, sizeof hdp->id) == 0) { if (isonum_711(hdp->type) != ISO_VD_PRIMARY) goto out_freebh; sbi->s_high_sierra = 1; opt.rock = 0; h_pri = (struct hs_primary_descriptor *)vdp; goto root_found; } } /* Just skip any volume descriptors we don't recognize */ brelse(bh); bh = NULL; } /* * If we fall through, either no volume descriptor was found, * or else we passed a primary descriptor looking for others. */ if (!pri) goto out_unknown_format; brelse(bh); bh = pri_bh; pri_bh = NULL; root_found: /* We don't support read-write mounts */ if (!sb_rdonly(s)) { error = -EACCES; goto out_freebh; } if (joliet_level && (!pri || !opt.rock)) { /* This is the case of Joliet with the norock mount flag. * A disc with both Joliet and Rock Ridge is handled later */ pri = (struct iso_primary_descriptor *) sec; } if(sbi->s_high_sierra){ rootp = (struct iso_directory_record *) h_pri->root_directory_record; sbi->s_nzones = isonum_733(h_pri->volume_space_size); sbi->s_log_zone_size = isonum_723(h_pri->logical_block_size); sbi->s_max_size = isonum_733(h_pri->volume_space_size); } else { if (!pri) goto out_freebh; rootp = (struct iso_directory_record *) pri->root_directory_record; sbi->s_nzones = isonum_733(pri->volume_space_size); sbi->s_log_zone_size = isonum_723(pri->logical_block_size); sbi->s_max_size = isonum_733(pri->volume_space_size); } sbi->s_ninodes = 0; /* No way to figure this out easily */ orig_zonesize = sbi->s_log_zone_size; /* * If the zone size is smaller than the hardware sector size, * this is a fatal error. This would occur if the disc drive * had sectors that were 2048 bytes, but the filesystem had * blocks that were 512 bytes (which should only very rarely * happen.) */ if (orig_zonesize < opt.blocksize) goto out_bad_size; /* RDE: convert log zone size to bit shift */ switch (sbi->s_log_zone_size) { case 512: sbi->s_log_zone_size = 9; break; case 1024: sbi->s_log_zone_size = 10; break; case 2048: sbi->s_log_zone_size = 11; break; default: goto out_bad_zone_size; } s->s_magic = ISOFS_SUPER_MAGIC; /* * With multi-extent files, file size is only limited by the maximum * size of a file system, which is 8 TB. */ s->s_maxbytes = 0x80000000000LL; /* ECMA-119 timestamp from 1900/1/1 with tz offset */ s->s_time_min = mktime64(1900, 1, 1, 0, 0, 0) - MAX_TZ_OFFSET; s->s_time_max = mktime64(U8_MAX+1900, 12, 31, 23, 59, 59) + MAX_TZ_OFFSET; /* Set this for reference. Its not currently used except on write which we don't have .. */ first_data_zone = isonum_733(rootp->extent) + isonum_711(rootp->ext_attr_length); sbi->s_firstdatazone = first_data_zone; #ifndef BEQUIET printk(KERN_DEBUG "ISOFS: Max size:%ld Log zone size:%ld\n", sbi->s_max_size, 1UL << sbi->s_log_zone_size); printk(KERN_DEBUG "ISOFS: First datazone:%ld\n", sbi->s_firstdatazone); if(sbi->s_high_sierra) printk(KERN_DEBUG "ISOFS: Disc in High Sierra format.\n"); #endif /* * If the Joliet level is set, we _may_ decide to use the * secondary descriptor, but can't be sure until after we * read the root inode. But before reading the root inode * we may need to change the device blocksize, and would * rather release the old buffer first. So, we cache the * first_data_zone value from the secondary descriptor. */ if (joliet_level) { pri = (struct iso_primary_descriptor *) sec; rootp = (struct iso_directory_record *) pri->root_directory_record; first_data_zone = isonum_733(rootp->extent) + isonum_711(rootp->ext_attr_length); } /* * We're all done using the volume descriptor, and may need * to change the device blocksize, so release the buffer now. */ brelse(pri_bh); brelse(bh); /* * Force the blocksize to 512 for 512 byte sectors. The file * read primitives really get it wrong in a bad way if we don't * do this. * * Note - we should never be setting the blocksize to something * less than the hardware sector size for the device. If we * do, we would end up having to read larger buffers and split * out portions to satisfy requests. * * Note2- the idea here is that we want to deal with the optimal * zonesize in the filesystem. If we have it set to something less, * then we have horrible problems with trying to piece together * bits of adjacent blocks in order to properly read directory * entries. By forcing the blocksize in this way, we ensure * that we will never be required to do this. */ sb_set_blocksize(s, orig_zonesize); sbi->s_nls_iocharset = NULL; #ifdef CONFIG_JOLIET if (joliet_level) { char *p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT; if (strcmp(p, "utf8") != 0) { sbi->s_nls_iocharset = opt.iocharset ? load_nls(opt.iocharset) : load_nls_default(); if (!sbi->s_nls_iocharset) goto out_freesbi; } } #endif s->s_op = &isofs_sops; s->s_export_op = &isofs_export_ops; sbi->s_mapping = opt.map; sbi->s_rock = (opt.rock ? 2 : 0); sbi->s_rock_offset = -1; /* initial offset, will guess until SP is found*/ sbi->s_cruft = opt.cruft; sbi->s_hide = opt.hide; sbi->s_showassoc = opt.showassoc; sbi->s_uid = opt.uid; sbi->s_gid = opt.gid; sbi->s_uid_set = opt.uid_set; sbi->s_gid_set = opt.gid_set; sbi->s_nocompress = opt.nocompress; sbi->s_overriderockperm = opt.overriderockperm; /* * It would be incredibly stupid to allow people to mark every file * on the disk as suid, so we merely allow them to set the default * permissions. */ if (opt.fmode != ISOFS_INVALID_MODE) sbi->s_fmode = opt.fmode & 0777; else sbi->s_fmode = ISOFS_INVALID_MODE; if (opt.dmode != ISOFS_INVALID_MODE) sbi->s_dmode = opt.dmode & 0777; else sbi->s_dmode = ISOFS_INVALID_MODE; /* * Read the root inode, which _may_ result in changing * the s_rock flag. Once we have the final s_rock value, * we then decide whether to use the Joliet descriptor. */ inode = isofs_iget(s, sbi->s_firstdatazone, 0); if (IS_ERR(inode)) goto out_no_root; /* * Fix for broken CDs with Rock Ridge and empty ISO root directory but * correct Joliet root directory. */ if (sbi->s_rock == 1 && joliet_level && rootdir_empty(s, sbi->s_firstdatazone)) { printk(KERN_NOTICE "ISOFS: primary root directory is empty. " "Disabling Rock Ridge and switching to Joliet."); sbi->s_rock = 0; } /* * If this disk has both Rock Ridge and Joliet on it, then we * want to use Rock Ridge by default. This can be overridden * by using the norock mount option. There is still one other * possibility that is not taken into account: a Rock Ridge * CD with Unicode names. Until someone sees such a beast, it * will not be supported. */ if (sbi->s_rock == 1) { joliet_level = 0; } else if (joliet_level) { sbi->s_rock = 0; if (sbi->s_firstdatazone != first_data_zone) { sbi->s_firstdatazone = first_data_zone; printk(KERN_DEBUG "ISOFS: changing to secondary root\n"); iput(inode); inode = isofs_iget(s, sbi->s_firstdatazone, 0); if (IS_ERR(inode)) goto out_no_root; } } if (opt.check == 'u') { /* Only Joliet is case insensitive by default */ if (joliet_level) opt.check = 'r'; else opt.check = 's'; } sbi->s_joliet_level = joliet_level; /* Make sure the root inode is a directory */ if (!S_ISDIR(inode->i_mode)) { printk(KERN_WARNING "isofs_fill_super: root inode is not a directory. " "Corrupted media?\n"); goto out_iput; } table = 0; if (joliet_level) table += 2; if (opt.check == 'r') table++; sbi->s_check = opt.check; if (table) s->s_d_op = &isofs_dentry_ops[table - 1]; /* get the root dentry */ s->s_root = d_make_root(inode); if (!(s->s_root)) { error = -ENOMEM; goto out_no_inode; } kfree(opt.iocharset); return 0; /* * Display error messages and free resources. */ out_iput: iput(inode); goto out_no_inode; out_no_root: error = PTR_ERR(inode); if (error != -ENOMEM) printk(KERN_WARNING "%s: get root inode failed\n", __func__); out_no_inode: #ifdef CONFIG_JOLIET unload_nls(sbi->s_nls_iocharset); #endif goto out_freesbi; out_no_read: printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n", __func__, s->s_id, iso_blknum, block); goto out_freebh; out_bad_zone_size: printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n", sbi->s_log_zone_size); goto out_freebh; out_bad_size: printk(KERN_WARNING "ISOFS: Logical zone size(%d) < hardware blocksize(%u)\n", orig_zonesize, opt.blocksize); goto out_freebh; out_unknown_format: if (!silent) printk(KERN_WARNING "ISOFS: Unable to identify CD-ROM format.\n"); out_freebh: brelse(bh); brelse(pri_bh); out_freesbi: kfree(opt.iocharset); kfree(sbi); s->s_fs_info = NULL; return error; } static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); buf->f_type = ISOFS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = (ISOFS_SB(sb)->s_nzones << (ISOFS_SB(sb)->s_log_zone_size - sb->s_blocksize_bits)); buf->f_bfree = 0; buf->f_bavail = 0; buf->f_files = ISOFS_SB(sb)->s_ninodes; buf->f_ffree = 0; buf->f_fsid = u64_to_fsid(id); buf->f_namelen = NAME_MAX; return 0; } /* * Get a set of blocks; filling in buffer_heads if already allocated * or getblk() if they are not. Returns the number of blocks inserted * (-ve == error.) */ int isofs_get_blocks(struct inode *inode, sector_t iblock, struct buffer_head **bh, unsigned long nblocks) { unsigned long b_off = iblock; unsigned offset, sect_size; unsigned int firstext; unsigned long nextblk, nextoff; int section, rv, error; struct iso_inode_info *ei = ISOFS_I(inode); error = -EIO; rv = 0; if (iblock != b_off) { printk(KERN_DEBUG "%s: block number too large\n", __func__); goto abort; } offset = 0; firstext = ei->i_first_extent; sect_size = ei->i_section_size >> ISOFS_BUFFER_BITS(inode); nextblk = ei->i_next_section_block; nextoff = ei->i_next_section_offset; section = 0; while (nblocks) { /* If we are *way* beyond the end of the file, print a message. * Access beyond the end of the file up to the next page boundary * is normal, however because of the way the page cache works. * In this case, we just return 0 so that we can properly fill * the page with useless information without generating any * I/O errors. */ if (b_off > ((inode->i_size + PAGE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) { printk(KERN_DEBUG "%s: block >= EOF (%lu, %llu)\n", __func__, b_off, (unsigned long long)inode->i_size); goto abort; } /* On the last section, nextblk == 0, section size is likely to * exceed sect_size by a partial block, and access beyond the * end of the file will reach beyond the section size, too. */ while (nextblk && (b_off >= (offset + sect_size))) { struct inode *ninode; offset += sect_size; ninode = isofs_iget(inode->i_sb, nextblk, nextoff); if (IS_ERR(ninode)) { error = PTR_ERR(ninode); goto abort; } firstext = ISOFS_I(ninode)->i_first_extent; sect_size = ISOFS_I(ninode)->i_section_size >> ISOFS_BUFFER_BITS(ninode); nextblk = ISOFS_I(ninode)->i_next_section_block; nextoff = ISOFS_I(ninode)->i_next_section_offset; iput(ninode); if (++section > 100) { printk(KERN_DEBUG "%s: More than 100 file sections ?!?" " aborting...\n", __func__); printk(KERN_DEBUG "%s: block=%lu firstext=%u sect_size=%u " "nextblk=%lu nextoff=%lu\n", __func__, b_off, firstext, (unsigned) sect_size, nextblk, nextoff); goto abort; } } if (*bh) { map_bh(*bh, inode->i_sb, firstext + b_off - offset); } else { *bh = sb_getblk(inode->i_sb, firstext+b_off-offset); if (!*bh) goto abort; } bh++; /* Next buffer head */ b_off++; /* Next buffer offset */ nblocks--; rv++; } error = 0; abort: return rv != 0 ? rv : error; } /* * Used by the standard interfaces. */ static int isofs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int ret; if (create) { printk(KERN_DEBUG "%s: Kernel tries to allocate a block\n", __func__); return -EROFS; } ret = isofs_get_blocks(inode, iblock, &bh_result, 1); return ret < 0 ? ret : 0; } static int isofs_bmap(struct inode *inode, sector_t block) { struct buffer_head dummy; int error; dummy.b_state = 0; dummy.b_blocknr = -1000; error = isofs_get_block(inode, block, &dummy, 0); if (!error) return dummy.b_blocknr; return 0; } struct buffer_head *isofs_bread(struct inode *inode, sector_t block) { sector_t blknr = isofs_bmap(inode, block); if (!blknr) return NULL; return sb_bread(inode->i_sb, blknr); } static int isofs_read_folio(struct file *file, struct folio *folio) { return mpage_read_folio(folio, isofs_get_block); } static void isofs_readahead(struct readahead_control *rac) { mpage_readahead(rac, isofs_get_block); } static sector_t _isofs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,isofs_get_block); } static const struct address_space_operations isofs_aops = { .read_folio = isofs_read_folio, .readahead = isofs_readahead, .bmap = _isofs_bmap }; static int isofs_read_level3_size(struct inode *inode) { unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); int high_sierra = ISOFS_SB(inode->i_sb)->s_high_sierra; struct buffer_head *bh = NULL; unsigned long block, offset, block_saved, offset_saved; int i = 0; int more_entries = 0; struct iso_directory_record *tmpde = NULL; struct iso_inode_info *ei = ISOFS_I(inode); inode->i_size = 0; /* The first 16 blocks are reserved as the System Area. Thus, * no inodes can appear in block 0. We use this to flag that * this is the last section. */ ei->i_next_section_block = 0; ei->i_next_section_offset = 0; block = ei->i_iget5_block; offset = ei->i_iget5_offset; do { struct iso_directory_record *de; unsigned int de_len; if (!bh) { bh = sb_bread(inode->i_sb, block); if (!bh) goto out_noread; } de = (struct iso_directory_record *) (bh->b_data + offset); de_len = *(unsigned char *) de; if (de_len == 0) { brelse(bh); bh = NULL; ++block; offset = 0; continue; } block_saved = block; offset_saved = offset; offset += de_len; /* Make sure we have a full directory entry */ if (offset >= bufsize) { int slop = bufsize - offset + de_len; if (!tmpde) { tmpde = kmalloc(256, GFP_KERNEL); if (!tmpde) goto out_nomem; } memcpy(tmpde, de, slop); offset &= bufsize - 1; block++; brelse(bh); bh = NULL; if (offset) { bh = sb_bread(inode->i_sb, block); if (!bh) goto out_noread; memcpy((void *)tmpde+slop, bh->b_data, offset); } de = tmpde; } inode->i_size += isonum_733(de->size); if (i == 1) { ei->i_next_section_block = block_saved; ei->i_next_section_offset = offset_saved; } more_entries = de->flags[-high_sierra] & 0x80; i++; if (i > 100) goto out_toomany; } while (more_entries); out: kfree(tmpde); brelse(bh); return 0; out_nomem: brelse(bh); return -ENOMEM; out_noread: printk(KERN_INFO "ISOFS: unable to read i-node block %lu\n", block); kfree(tmpde); return -EIO; out_toomany: printk(KERN_INFO "%s: More than 100 file sections ?!?, aborting...\n" "isofs_read_level3_size: inode=%lu\n", __func__, inode->i_ino); goto out; } static int isofs_read_inode(struct inode *inode, int relocated) { struct super_block *sb = inode->i_sb; struct isofs_sb_info *sbi = ISOFS_SB(sb); unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); unsigned long block; int high_sierra = sbi->s_high_sierra; struct buffer_head *bh; struct iso_directory_record *de; struct iso_directory_record *tmpde = NULL; unsigned int de_len; unsigned long offset; struct iso_inode_info *ei = ISOFS_I(inode); int ret = -EIO; block = ei->i_iget5_block; bh = sb_bread(inode->i_sb, block); if (!bh) goto out_badread; offset = ei->i_iget5_offset; de = (struct iso_directory_record *) (bh->b_data + offset); de_len = *(unsigned char *) de; if (de_len < sizeof(struct iso_directory_record)) goto fail; if (offset + de_len > bufsize) { int frag1 = bufsize - offset; tmpde = kmalloc(de_len, GFP_KERNEL); if (!tmpde) { ret = -ENOMEM; goto fail; } memcpy(tmpde, bh->b_data + offset, frag1); brelse(bh); bh = sb_bread(inode->i_sb, ++block); if (!bh) goto out_badread; memcpy((char *)tmpde+frag1, bh->b_data, de_len - frag1); de = tmpde; } inode->i_ino = isofs_get_ino(ei->i_iget5_block, ei->i_iget5_offset, ISOFS_BUFFER_BITS(inode)); /* Assume it is a normal-format file unless told otherwise */ ei->i_file_format = isofs_file_normal; if (de->flags[-high_sierra] & 2) { if (sbi->s_dmode != ISOFS_INVALID_MODE) inode->i_mode = S_IFDIR | sbi->s_dmode; else inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; set_nlink(inode, 1); /* * Set to 1. We know there are 2, but * the find utility tries to optimize * if it is 2, and it screws up. It is * easier to give 1 which tells find to * do it the hard way. */ } else { if (sbi->s_fmode != ISOFS_INVALID_MODE) { inode->i_mode = S_IFREG | sbi->s_fmode; } else { /* * Set default permissions: r-x for all. The disc * could be shared with DOS machines so virtually * anything could be a valid executable. */ inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO; } set_nlink(inode, 1); } inode->i_uid = sbi->s_uid; inode->i_gid = sbi->s_gid; inode->i_blocks = 0; ei->i_format_parm[0] = 0; ei->i_format_parm[1] = 0; ei->i_format_parm[2] = 0; ei->i_section_size = isonum_733(de->size); if (de->flags[-high_sierra] & 0x80) { ret = isofs_read_level3_size(inode); if (ret < 0) goto fail; ret = -EIO; } else { ei->i_next_section_block = 0; ei->i_next_section_offset = 0; inode->i_size = isonum_733(de->size); } /* * Some dipshit decided to store some other bit of information * in the high byte of the file length. Truncate size in case * this CDROM was mounted with the cruft option. */ if (sbi->s_cruft) inode->i_size &= 0x00ffffff; if (de->interleave[0]) { printk(KERN_DEBUG "ISOFS: Interleaved files not (yet) supported.\n"); inode->i_size = 0; } /* I have no idea what file_unit_size is used for, so we will flag it for now */ if (de->file_unit_size[0] != 0) { printk(KERN_DEBUG "ISOFS: File unit size != 0 for ISO file (%ld).\n", inode->i_ino); } /* I have no idea what other flag bits are used for, so we will flag it for now */ #ifdef DEBUG if((de->flags[-high_sierra] & ~2)!= 0){ printk(KERN_DEBUG "ISOFS: Unusual flag settings for ISO file " "(%ld %x).\n", inode->i_ino, de->flags[-high_sierra]); } #endif inode_set_mtime_to_ts(inode, inode_set_atime_to_ts(inode, inode_set_ctime(inode, iso_date(de->date, high_sierra), 0))); ei->i_first_extent = (isonum_733(de->extent) + isonum_711(de->ext_attr_length)); /* Set the number of blocks for stat() - should be done before RR */ inode->i_blocks = (inode->i_size + 511) >> 9; /* * Now test for possible Rock Ridge extensions which will override * some of these numbers in the inode structure. */ if (!high_sierra) { parse_rock_ridge_inode(de, inode, relocated); /* if we want uid/gid set, override the rock ridge setting */ if (sbi->s_uid_set) inode->i_uid = sbi->s_uid; if (sbi->s_gid_set) inode->i_gid = sbi->s_gid; } /* Now set final access rights if overriding rock ridge setting */ if (S_ISDIR(inode->i_mode) && sbi->s_overriderockperm && sbi->s_dmode != ISOFS_INVALID_MODE) inode->i_mode = S_IFDIR | sbi->s_dmode; if (S_ISREG(inode->i_mode) && sbi->s_overriderockperm && sbi->s_fmode != ISOFS_INVALID_MODE) inode->i_mode = S_IFREG | sbi->s_fmode; /* Install the inode operations vector */ if (S_ISREG(inode->i_mode)) { inode->i_fop = &generic_ro_fops; switch (ei->i_file_format) { #ifdef CONFIG_ZISOFS case isofs_file_compressed: inode->i_data.a_ops = &zisofs_aops; break; #endif default: inode->i_data.a_ops = &isofs_aops; break; } } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &isofs_dir_inode_operations; inode->i_fop = &isofs_dir_operations; } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &page_symlink_inode_operations; inode_nohighmem(inode); inode->i_data.a_ops = &isofs_symlink_aops; } else /* XXX - parse_rock_ridge_inode() had already set i_rdev. */ init_special_inode(inode, inode->i_mode, inode->i_rdev); ret = 0; out: kfree(tmpde); brelse(bh); return ret; out_badread: printk(KERN_WARNING "ISOFS: unable to read i-node block\n"); fail: goto out; } struct isofs_iget5_callback_data { unsigned long block; unsigned long offset; }; static int isofs_iget5_test(struct inode *ino, void *data) { struct iso_inode_info *i = ISOFS_I(ino); struct isofs_iget5_callback_data *d = (struct isofs_iget5_callback_data*)data; return (i->i_iget5_block == d->block) && (i->i_iget5_offset == d->offset); } static int isofs_iget5_set(struct inode *ino, void *data) { struct iso_inode_info *i = ISOFS_I(ino); struct isofs_iget5_callback_data *d = (struct isofs_iget5_callback_data*)data; i->i_iget5_block = d->block; i->i_iget5_offset = d->offset; return 0; } /* Store, in the inode's containing structure, the block and block * offset that point to the underlying meta-data for the inode. The * code below is otherwise similar to the iget() code in * include/linux/fs.h */ struct inode *__isofs_iget(struct super_block *sb, unsigned long block, unsigned long offset, int relocated) { unsigned long hashval; struct inode *inode; struct isofs_iget5_callback_data data; long ret; if (offset >= 1ul << sb->s_blocksize_bits) return ERR_PTR(-EINVAL); data.block = block; data.offset = offset; hashval = (block << sb->s_blocksize_bits) | offset; inode = iget5_locked(sb, hashval, &isofs_iget5_test, &isofs_iget5_set, &data); if (!inode) return ERR_PTR(-ENOMEM); if (inode->i_state & I_NEW) { ret = isofs_read_inode(inode, relocated); if (ret < 0) { iget_failed(inode); inode = ERR_PTR(ret); } else { unlock_new_inode(inode); } } return inode; } static struct dentry *isofs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super); } static struct file_system_type iso9660_fs_type = { .owner = THIS_MODULE, .name = "iso9660", .mount = isofs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("iso9660"); MODULE_ALIAS("iso9660"); static int __init init_iso9660_fs(void) { int err = init_inodecache(); if (err) goto out; #ifdef CONFIG_ZISOFS err = zisofs_init(); if (err) goto out1; #endif err = register_filesystem(&iso9660_fs_type); if (err) goto out2; return 0; out2: #ifdef CONFIG_ZISOFS zisofs_cleanup(); out1: #endif destroy_inodecache(); out: return err; } static void __exit exit_iso9660_fs(void) { unregister_filesystem(&iso9660_fs_type); #ifdef CONFIG_ZISOFS zisofs_cleanup(); #endif destroy_inodecache(); } module_init(init_iso9660_fs) module_exit(exit_iso9660_fs) MODULE_LICENSE("GPL"); |
4 2 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 | // SPDX-License-Identifier: GPL-2.0-or-later /* * connector.c * * 2004+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net> * All rights reserved. */ #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/list.h> #include <linux/skbuff.h> #include <net/netlink.h> #include <linux/moduleparam.h> #include <linux/connector.h> #include <linux/slab.h> #include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/spinlock.h> #include <net/sock.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>"); MODULE_DESCRIPTION("Generic userspace <-> kernelspace connector."); MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_CONNECTOR); static struct cn_dev cdev; static int cn_already_initialized; /* * Sends mult (multiple) cn_msg at a time. * * msg->seq and msg->ack are used to determine message genealogy. * When someone sends message it puts there locally unique sequence * and random acknowledge numbers. Sequence number may be copied into * nlmsghdr->nlmsg_seq too. * * Sequence number is incremented with each message to be sent. * * If we expect a reply to our message then the sequence number in * received message MUST be the same as in original message, and * acknowledge number MUST be the same + 1. * * If we receive a message and its sequence number is not equal to the * one we are expecting then it is a new message. * * If we receive a message and its sequence number is the same as one * we are expecting but it's acknowledgement number is not equal to * the acknowledgement number in the original message + 1, then it is * a new message. * * If msg->len != len, then additional cn_msg messages are expected following * the first msg. * * The message is sent to, the portid if given, the group if given, both if * both, or if both are zero then the group is looked up and sent there. */ int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 __group, gfp_t gfp_mask, int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), void *filter_data) { struct cn_callback_entry *__cbq; unsigned int size; struct sk_buff *skb; struct nlmsghdr *nlh; struct cn_msg *data; struct cn_dev *dev = &cdev; u32 group = 0; int found = 0; if (portid || __group) { group = __group; } else { spin_lock_bh(&dev->cbdev->queue_lock); list_for_each_entry(__cbq, &dev->cbdev->queue_list, callback_entry) { if (cn_cb_equal(&__cbq->id.id, &msg->id)) { found = 1; group = __cbq->group; break; } } spin_unlock_bh(&dev->cbdev->queue_lock); if (!found) return -ENODEV; } if (!portid && !netlink_has_listeners(dev->nls, group)) return -ESRCH; size = sizeof(*msg) + len; skb = nlmsg_new(size, gfp_mask); if (!skb) return -ENOMEM; nlh = nlmsg_put(skb, 0, msg->seq, NLMSG_DONE, size, 0); if (!nlh) { kfree_skb(skb); return -EMSGSIZE; } data = nlmsg_data(nlh); memcpy(data, msg, size); NETLINK_CB(skb).dst_group = group; if (group) return netlink_broadcast_filtered(dev->nls, skb, portid, group, gfp_mask, filter, (void *)filter_data); return netlink_unicast(dev->nls, skb, portid, !gfpflags_allow_blocking(gfp_mask)); } EXPORT_SYMBOL_GPL(cn_netlink_send_mult); /* same as cn_netlink_send_mult except msg->len is used for len */ int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group, gfp_t gfp_mask) { return cn_netlink_send_mult(msg, msg->len, portid, __group, gfp_mask, NULL, NULL); } EXPORT_SYMBOL_GPL(cn_netlink_send); /* * Callback helper - queues work and setup destructor for given data. */ static int cn_call_callback(struct sk_buff *skb) { struct nlmsghdr *nlh; struct cn_callback_entry *i, *cbq = NULL; struct cn_dev *dev = &cdev; struct cn_msg *msg = nlmsg_data(nlmsg_hdr(skb)); struct netlink_skb_parms *nsp = &NETLINK_CB(skb); int err = -ENODEV; /* verify msg->len is within skb */ nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < NLMSG_HDRLEN + sizeof(struct cn_msg) + msg->len) return -EINVAL; spin_lock_bh(&dev->cbdev->queue_lock); list_for_each_entry(i, &dev->cbdev->queue_list, callback_entry) { if (cn_cb_equal(&i->id.id, &msg->id)) { refcount_inc(&i->refcnt); cbq = i; break; } } spin_unlock_bh(&dev->cbdev->queue_lock); if (cbq != NULL) { cbq->callback(msg, nsp); kfree_skb(skb); cn_queue_release_callback(cbq); err = 0; } return err; } /* * Allow non-root access for NETLINK_CONNECTOR family having CN_IDX_PROC * multicast group. */ static int cn_bind(struct net *net, int group) { unsigned long groups = (unsigned long) group; if (ns_capable(net->user_ns, CAP_NET_ADMIN)) return 0; if (test_bit(CN_IDX_PROC - 1, &groups)) return 0; return -EPERM; } static void cn_release(struct sock *sk, unsigned long *groups) { if (groups && test_bit(CN_IDX_PROC - 1, groups)) { kfree(sk->sk_user_data); sk->sk_user_data = NULL; } } /* * Main netlink receiving function. * * It checks skb, netlink header and msg sizes, and calls callback helper. */ static void cn_rx_skb(struct sk_buff *skb) { struct nlmsghdr *nlh; int len, err; if (skb->len >= NLMSG_HDRLEN) { nlh = nlmsg_hdr(skb); len = nlmsg_len(nlh); if (len < (int)sizeof(struct cn_msg) || skb->len < nlh->nlmsg_len || len > CONNECTOR_MAX_MSG_SIZE) return; err = cn_call_callback(skb_get(skb)); if (err < 0) kfree_skb(skb); } } /* * Callback add routing - adds callback with given ID and name. * If there is registered callback with the same ID it will not be added. * * May sleep. */ int cn_add_callback(const struct cb_id *id, const char *name, void (*callback)(struct cn_msg *, struct netlink_skb_parms *)) { struct cn_dev *dev = &cdev; if (!cn_already_initialized) return -EAGAIN; return cn_queue_add_callback(dev->cbdev, name, id, callback); } EXPORT_SYMBOL_GPL(cn_add_callback); /* * Callback remove routing - removes callback * with given ID. * If there is no registered callback with given * ID nothing happens. * * May sleep while waiting for reference counter to become zero. */ void cn_del_callback(const struct cb_id *id) { struct cn_dev *dev = &cdev; cn_queue_del_callback(dev->cbdev, id); } EXPORT_SYMBOL_GPL(cn_del_callback); static int __maybe_unused cn_proc_show(struct seq_file *m, void *v) { struct cn_queue_dev *dev = cdev.cbdev; struct cn_callback_entry *cbq; seq_printf(m, "Name ID\n"); spin_lock_bh(&dev->queue_lock); list_for_each_entry(cbq, &dev->queue_list, callback_entry) { seq_printf(m, "%-15s %u:%u\n", cbq->id.name, cbq->id.id.idx, cbq->id.id.val); } spin_unlock_bh(&dev->queue_lock); return 0; } static int cn_init(void) { struct cn_dev *dev = &cdev; struct netlink_kernel_cfg cfg = { .groups = CN_NETLINK_USERS + 0xf, .input = cn_rx_skb, .flags = NL_CFG_F_NONROOT_RECV, .bind = cn_bind, .release = cn_release, }; dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, &cfg); if (!dev->nls) return -EIO; dev->cbdev = cn_queue_alloc_dev("cqueue", dev->nls); if (!dev->cbdev) { netlink_kernel_release(dev->nls); return -EINVAL; } cn_already_initialized = 1; proc_create_single("connector", S_IRUGO, init_net.proc_net, cn_proc_show); return 0; } static void cn_fini(void) { struct cn_dev *dev = &cdev; cn_already_initialized = 0; remove_proc_entry("connector", init_net.proc_net); cn_queue_free_dev(dev->cbdev); netlink_kernel_release(dev->nls); } subsys_initcall(cn_init); module_exit(cn_fini); |
9 25 6 4086 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SWAP_H #define _LINUX_SWAP_H #include <linux/spinlock.h> #include <linux/linkage.h> #include <linux/mmzone.h> #include <linux/list.h> #include <linux/memcontrol.h> #include <linux/sched.h> #include <linux/node.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/atomic.h> #include <linux/page-flags.h> #include <uapi/linux/mempolicy.h> #include <asm/page.h> struct notifier_block; struct bio; struct pagevec; #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 #define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */ #define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */ #define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */ #define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \ SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \ SWAP_FLAG_DISCARD_PAGES) #define SWAP_BATCH 64 static inline int current_is_kswapd(void) { return current->flags & PF_KSWAPD; } /* * MAX_SWAPFILES defines the maximum number of swaptypes: things which can * be swapped to. The swap type and the offset into that swap type are * encoded into pte's and into pgoff_t's in the swapcache. Using five bits * for the type means that the maximum number of swapcache pages is 27 bits * on 32-bit-pgoff_t architectures. And that assumes that the architecture packs * the type/offset into the pte as 5/27 as well. */ #define MAX_SWAPFILES_SHIFT 5 /* * Use some of the swap files numbers for other purposes. This * is a convenient way to hook into the VM to trigger special * actions on faults. */ /* * PTE markers are used to persist information onto PTEs that otherwise * should be a none pte. As its name "PTE" hints, it should only be * applied to the leaves of pgtables. */ #define SWP_PTE_MARKER_NUM 1 #define SWP_PTE_MARKER (MAX_SWAPFILES + SWP_HWPOISON_NUM + \ SWP_MIGRATION_NUM + SWP_DEVICE_NUM) /* * Unaddressable device memory support. See include/linux/hmm.h and * Documentation/mm/hmm.rst. Short description is we need struct pages for * device memory that is unaddressable (inaccessible) by CPU, so that we can * migrate part of a process memory to device memory. * * When a page is migrated from CPU to device, we set the CPU page table entry * to a special SWP_DEVICE_{READ|WRITE} entry. * * When a page is mapped by the device for exclusive access we set the CPU page * table entries to special SWP_DEVICE_EXCLUSIVE_* entries. */ #ifdef CONFIG_DEVICE_PRIVATE #define SWP_DEVICE_NUM 4 #define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM) #define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1) #define SWP_DEVICE_EXCLUSIVE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2) #define SWP_DEVICE_EXCLUSIVE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3) #else #define SWP_DEVICE_NUM 0 #endif /* * Page migration support. * * SWP_MIGRATION_READ_EXCLUSIVE is only applicable to anonymous pages and * indicates that the referenced (part of) an anonymous page is exclusive to * a single process. For SWP_MIGRATION_WRITE, that information is implicit: * (part of) an anonymous page that are mapped writable are exclusive to a * single process. */ #ifdef CONFIG_MIGRATION #define SWP_MIGRATION_NUM 3 #define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM) #define SWP_MIGRATION_READ_EXCLUSIVE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1) #define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 2) #else #define SWP_MIGRATION_NUM 0 #endif /* * Handling of hardware poisoned pages with memory corruption. */ #ifdef CONFIG_MEMORY_FAILURE #define SWP_HWPOISON_NUM 1 #define SWP_HWPOISON MAX_SWAPFILES #else #define SWP_HWPOISON_NUM 0 #endif #define MAX_SWAPFILES \ ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \ SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \ SWP_PTE_MARKER_NUM) /* * Magic header for a swap area. The first part of the union is * what the swap magic looks like for the old (limited to 128MB) * swap area format, the second part of the union adds - in the * old reserved area - some extra information. Note that the first * kilobyte is reserved for boot loader or disk label stuff... * * Having the magic at the end of the PAGE_SIZE makes detecting swap * areas somewhat tricky on machines that support multiple page sizes. * For 2.5 we'll probably want to move the magic to just beyond the * bootbits... */ union swap_header { struct { char reserved[PAGE_SIZE - 10]; char magic[10]; /* SWAP-SPACE or SWAPSPACE2 */ } magic; struct { char bootbits[1024]; /* Space for disklabel etc. */ __u32 version; __u32 last_page; __u32 nr_badpages; unsigned char sws_uuid[16]; unsigned char sws_volume[16]; __u32 padding[117]; __u32 badpages[1]; } info; }; /* * current->reclaim_state points to one of these when a task is running * memory reclaim */ struct reclaim_state { /* pages reclaimed outside of LRU-based reclaim */ unsigned long reclaimed; #ifdef CONFIG_LRU_GEN /* per-thread mm walk data */ struct lru_gen_mm_walk *mm_walk; #endif }; /* * mm_account_reclaimed_pages(): account reclaimed pages outside of LRU-based * reclaim * @pages: number of pages reclaimed * * If the current process is undergoing a reclaim operation, increment the * number of reclaimed pages by @pages. */ static inline void mm_account_reclaimed_pages(unsigned long pages) { if (current->reclaim_state) current->reclaim_state->reclaimed += pages; } #ifdef __KERNEL__ struct address_space; struct sysinfo; struct writeback_control; struct zone; /* * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of * disk blocks. A rbtree of swap extents maps the entire swapfile (Where the * term `swapfile' refers to either a blockdevice or an IS_REG file). Apart * from setup, they're handled identically. * * We always assume that blocks are of size PAGE_SIZE. */ struct swap_extent { struct rb_node rb_node; pgoff_t start_page; pgoff_t nr_pages; sector_t start_block; }; /* * Max bad pages in the new format.. */ #define MAX_SWAP_BADPAGES \ ((offsetof(union swap_header, magic.magic) - \ offsetof(union swap_header, info.badpages)) / sizeof(int)) enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ SWP_DISCARDABLE = (1 << 2), /* blkdev support discard */ SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */ SWP_BLKDEV = (1 << 6), /* its a block device */ SWP_ACTIVATED = (1 << 7), /* set after swap_activate success */ SWP_FS_OPS = (1 << 8), /* swapfile operations go through fs */ SWP_AREA_DISCARD = (1 << 9), /* single-time swap area discards */ SWP_PAGE_DISCARD = (1 << 10), /* freed swap page-cluster discards */ SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */ SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */ /* add others here before... */ SWP_SCANNING = (1 << 14), /* refcount in scan_swap_map */ }; #define SWAP_CLUSTER_MAX 32UL #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX /* Bit flag in swap_map */ #define SWAP_HAS_CACHE 0x40 /* Flag page is cached, in first swap_map */ #define COUNT_CONTINUED 0x80 /* Flag swap_map continuation for full count */ /* Special value in first swap_map */ #define SWAP_MAP_MAX 0x3e /* Max count */ #define SWAP_MAP_BAD 0x3f /* Note page is bad */ #define SWAP_MAP_SHMEM 0xbf /* Owned by shmem/tmpfs */ /* Special value in each swap_map continuation */ #define SWAP_CONT_MAX 0x7f /* Max count */ /* * We use this to track usage of a cluster. A cluster is a block of swap disk * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All * free clusters are organized into a list. We fetch an entry from the list to * get a free cluster. * * The data field stores next cluster if the cluster is free or cluster usage * counter otherwise. The flags field determines if a cluster is free. This is * protected by swap_info_struct.lock. */ struct swap_cluster_info { spinlock_t lock; /* * Protect swap_cluster_info fields * and swap_info_struct->swap_map * elements correspond to the swap * cluster */ unsigned int data:24; unsigned int flags:8; }; #define CLUSTER_FLAG_FREE 1 /* This cluster is free */ #define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */ #define CLUSTER_FLAG_HUGE 4 /* This cluster is backing a transparent huge page */ /* * We assign a cluster to each CPU, so each CPU can allocate swap entry from * its own cluster and swapout sequentially. The purpose is to optimize swapout * throughput. */ struct percpu_cluster { struct swap_cluster_info index; /* Current cluster index */ unsigned int next; /* Likely next allocation offset */ }; struct swap_cluster_list { struct swap_cluster_info head; struct swap_cluster_info tail; }; /* * The in-memory structure used to track swap areas. */ struct swap_info_struct { struct percpu_ref users; /* indicate and keep swap device valid. */ unsigned long flags; /* SWP_USED etc: see above */ signed short prio; /* swap priority of this type */ struct plist_node list; /* entry in swap_active_head */ signed char type; /* strange name for an index */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ struct swap_cluster_list free_clusters; /* free clusters list */ unsigned int lowest_bit; /* index of first free in swap_map */ unsigned int highest_bit; /* index of last free in swap_map */ unsigned int pages; /* total of usable pages of swap */ unsigned int inuse_pages; /* number of those currently in use */ unsigned int cluster_next; /* likely index for next allocation */ unsigned int cluster_nr; /* countdown to next cluster search */ unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */ struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ struct rb_root swap_extent_root;/* root of the swap extent rbtree */ struct bdev_handle *bdev_handle;/* open handle of the bdev */ struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ struct completion comp; /* seldom referenced */ spinlock_t lock; /* * protect map scan related fields like * swap_map, lowest_bit, highest_bit, * inuse_pages, cluster_next, * cluster_nr, lowest_alloc, * highest_alloc, free/discard cluster * list. other fields are only changed * at swapon/swapoff, so are protected * by swap_lock. changing flags need * hold this lock and swap_lock. If * both locks need hold, hold swap_lock * first. */ spinlock_t cont_lock; /* * protect swap count continuation page * list. */ struct work_struct discard_work; /* discard worker */ struct swap_cluster_list discard_clusters; /* discard clusters list */ struct plist_node avail_lists[]; /* * entries in swap_avail_heads, one * entry per node. * Must be last as the number of the * array is nr_node_ids, which is not * a fixed value so have to allocate * dynamically. * And it has to be an array so that * plist_for_each_* can work. */ }; static inline swp_entry_t page_swap_entry(struct page *page) { struct folio *folio = page_folio(page); swp_entry_t entry = folio->swap; entry.val += folio_page_idx(folio, page); return entry; } /* linux/mm/workingset.c */ bool workingset_test_recent(void *shadow, bool file, bool *workingset); void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg); void workingset_refault(struct folio *folio, void *shadow); void workingset_activation(struct folio *folio); /* Only track the nodes of mappings with shadow entries */ void workingset_update_node(struct xa_node *node); extern struct list_lru shadow_nodes; #define mapping_set_update(xas, mapping) do { \ if (!dax_mapping(mapping) && !shmem_mapping(mapping)) { \ xas_set_update(xas, workingset_update_node); \ xas_set_lru(xas, &shadow_nodes); \ } \ } while (0) /* linux/mm/page_alloc.c */ extern unsigned long totalreserve_pages; /* Definition of global_zone_page_state not available yet */ #define nr_free_pages() global_zone_page_state(NR_FREE_PAGES) /* linux/mm/swap.c */ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_io, unsigned int nr_rotated); void lru_note_cost_refault(struct folio *); void folio_add_lru(struct folio *); void folio_add_lru_vma(struct folio *, struct vm_area_struct *); void mark_page_accessed(struct page *); void folio_mark_accessed(struct folio *); extern atomic_t lru_disable_count; static inline bool lru_cache_disabled(void) { return atomic_read(&lru_disable_count); } static inline void lru_cache_enable(void) { atomic_dec(&lru_disable_count); } extern void lru_cache_disable(void); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); void folio_deactivate(struct folio *folio); void folio_mark_lazyfree(struct folio *folio); extern void swap_setup(void); extern void lru_cache_add_inactive_or_unevictable(struct page *page, struct vm_area_struct *vma); /* linux/mm/vmscan.c */ extern unsigned long zone_reclaimable_pages(struct zone *zone); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); #define MEMCG_RECLAIM_MAY_SWAP (1 << 1) #define MEMCG_RECLAIM_PROACTIVE (1 << 2) extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, unsigned int reclaim_options); extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, unsigned long *nr_scanned); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; long remove_mapping(struct address_space *mapping, struct folio *folio); #ifdef CONFIG_NUMA extern int node_reclaim_mode; extern int sysctl_min_unmapped_ratio; extern int sysctl_min_slab_ratio; #else #define node_reclaim_mode 0 #endif static inline bool node_reclaim_enabled(void) { /* Is any node_reclaim_mode bit set? */ return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP); } void check_move_unevictable_folios(struct folio_batch *fbatch); extern void __meminit kswapd_run(int nid); extern void __meminit kswapd_stop(int nid); #ifdef CONFIG_SWAP int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block); int generic_swapfile_activate(struct swap_info_struct *, struct file *, sector_t *); static inline unsigned long total_swapcache_pages(void) { return global_node_page_state(NR_SWAPCACHE); } extern void free_swap_cache(struct page *page); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct encoded_page **, int); /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; extern atomic_t nr_rotate_swap; extern bool has_usable_swap(void); /* Swap 50% full? Release swapcache more aggressively.. */ static inline bool vm_swap_full(void) { return atomic_long_read(&nr_swap_pages) * 2 < total_swap_pages; } static inline long get_nr_swap_pages(void) { return atomic_long_read(&nr_swap_pages); } extern void si_swapinfo(struct sysinfo *); swp_entry_t folio_alloc_swap(struct folio *folio); bool folio_free_swap(struct folio *folio); void put_swap_folio(struct folio *folio, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t); extern void swap_free(swp_entry_t); extern void swapcache_free_entries(swp_entry_t *entries, int n); extern int free_swap_and_cache(swp_entry_t); int swap_type_of(dev_t device, sector_t offset); int find_first_swap(dev_t *device); extern unsigned int count_swap_pages(int, int); extern sector_t swapdev_block(int, pgoff_t); extern int __swap_count(swp_entry_t entry); extern int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); extern void exit_swap_address_space(unsigned int type); extern struct swap_info_struct *get_swap_device(swp_entry_t entry); sector_t swap_page_sector(struct page *page); static inline void put_swap_device(struct swap_info_struct *si) { percpu_ref_put(&si->users); } #else /* CONFIG_SWAP */ static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return NULL; } static inline struct swap_info_struct *get_swap_device(swp_entry_t entry) { return NULL; } static inline void put_swap_device(struct swap_info_struct *si) { } #define get_nr_swap_pages() 0L #define total_swap_pages 0L #define total_swapcache_pages() 0UL #define vm_swap_full() 0 #define si_swapinfo(val) \ do { (val)->freeswap = (val)->totalswap = 0; } while (0) /* only sparc can not include linux/pagemap.h in this file * so leave put_page and release_pages undeclared... */ #define free_page_and_swap_cache(page) \ put_page(page) #define free_pages_and_swap_cache(pages, nr) \ release_pages((pages), (nr)); /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */ #define free_swap_and_cache(e) is_pfn_swap_entry(e) static inline void free_swap_cache(struct page *page) { } static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask) { return 0; } static inline void swap_shmem_alloc(swp_entry_t swp) { } static inline int swap_duplicate(swp_entry_t swp) { return 0; } static inline void swap_free(swp_entry_t swp) { } static inline void put_swap_folio(struct folio *folio, swp_entry_t swp) { } static inline int __swap_count(swp_entry_t entry) { return 0; } static inline int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) { return 0; } static inline int swp_swapcount(swp_entry_t entry) { return 0; } static inline swp_entry_t folio_alloc_swap(struct folio *folio) { swp_entry_t entry; entry.val = 0; return entry; } static inline bool folio_free_swap(struct folio *folio) { return false; } static inline int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block) { return -EINVAL; } #endif /* CONFIG_SWAP */ #ifdef CONFIG_THP_SWAP extern int split_swap_cluster(swp_entry_t entry); #else static inline int split_swap_cluster(swp_entry_t entry) { return 0; } #endif #ifdef CONFIG_MEMCG static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) { /* Cgroup2 doesn't have per-cgroup swappiness */ if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) return READ_ONCE(vm_swappiness); /* root ? */ if (mem_cgroup_disabled() || mem_cgroup_is_root(memcg)) return READ_ONCE(vm_swappiness); return READ_ONCE(memcg->swappiness); } #else static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) { return READ_ONCE(vm_swappiness); } #endif #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) void __folio_throttle_swaprate(struct folio *folio, gfp_t gfp); static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp) { if (mem_cgroup_disabled()) return; __folio_throttle_swaprate(folio, gfp); } #else static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp) { } #endif #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry); int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry); static inline int mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) { if (mem_cgroup_disabled()) return 0; return __mem_cgroup_try_charge_swap(folio, entry); } extern void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) { if (mem_cgroup_disabled()) return; __mem_cgroup_uncharge_swap(entry, nr_pages); } extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); extern bool mem_cgroup_swap_full(struct folio *folio); #else static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) { } static inline int mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) { return 0; } static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) { } static inline long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg) { return get_nr_swap_pages(); } static inline bool mem_cgroup_swap_full(struct folio *folio) { return vm_swap_full(); } #endif #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ |
6 1087 1088 1263 20 1247 1335 3 6 1 2 1 1 42 17 17 5 1095 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 | // SPDX-License-Identifier: GPL-2.0 #include <linux/quotaops.h> #include <linux/uuid.h> #include "ext4.h" #include "xattr.h" #include "ext4_jbd2.h" static void ext4_fname_from_fscrypt_name(struct ext4_filename *dst, const struct fscrypt_name *src) { memset(dst, 0, sizeof(*dst)); dst->usr_fname = src->usr_fname; dst->disk_name = src->disk_name; dst->hinfo.hash = src->hash; dst->hinfo.minor_hash = src->minor_hash; dst->crypto_buf = src->crypto_buf; } int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct ext4_filename *fname) { struct fscrypt_name name; int err; err = fscrypt_setup_filename(dir, iname, lookup, &name); if (err) return err; ext4_fname_from_fscrypt_name(fname, &name); #if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, iname, fname); if (err) ext4_fname_free_filename(fname); #endif return err; } int ext4_fname_prepare_lookup(struct inode *dir, struct dentry *dentry, struct ext4_filename *fname) { struct fscrypt_name name; int err; err = fscrypt_prepare_lookup(dir, dentry, &name); if (err) return err; ext4_fname_from_fscrypt_name(fname, &name); #if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname); if (err) ext4_fname_free_filename(fname); #endif return err; } void ext4_fname_free_filename(struct ext4_filename *fname) { struct fscrypt_name name; name.crypto_buf = fname->crypto_buf; fscrypt_free_filename(&name); fname->crypto_buf.name = NULL; fname->usr_fname = NULL; fname->disk_name.name = NULL; #if IS_ENABLED(CONFIG_UNICODE) kfree(fname->cf_name.name); fname->cf_name.name = NULL; #endif } static bool uuid_is_zero(__u8 u[16]) { int i; for (i = 0; i < 16; i++) if (u[i]) return false; return true; } int ext4_ioctl_get_encryption_pwsalt(struct file *filp, void __user *arg) { struct super_block *sb = file_inode(filp)->i_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); int err, err2; handle_t *handle; if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; if (uuid_is_zero(sbi->s_es->s_encrypt_pw_salt)) { err = mnt_want_write_file(filp); if (err) return err; handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); goto pwsalt_err_exit; } err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh, EXT4_JTR_NONE); if (err) goto pwsalt_err_journal; lock_buffer(sbi->s_sbh); generate_random_uuid(sbi->s_es->s_encrypt_pw_salt); ext4_superblock_csum_set(sb); unlock_buffer(sbi->s_sbh); err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); pwsalt_err_journal: err2 = ext4_journal_stop(handle); if (err2 && !err) err = err2; pwsalt_err_exit: mnt_drop_write_file(filp); if (err) return err; } if (copy_to_user(arg, sbi->s_es->s_encrypt_pw_salt, 16)) return -EFAULT; return 0; } static int ext4_get_context(struct inode *inode, void *ctx, size_t len) { return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len); } static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, void *fs_data) { handle_t *handle = fs_data; int res, res2, credits, retries = 0; /* * Encrypting the root directory is not allowed because e2fsck expects * lost+found to exist and be unencrypted, and encrypting the root * directory would imply encrypting the lost+found directory as well as * the filename "lost+found" itself. */ if (inode->i_ino == EXT4_ROOT_INO) return -EPERM; if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode))) return -EINVAL; if (ext4_test_inode_flag(inode, EXT4_INODE_DAX)) return -EOPNOTSUPP; res = ext4_convert_inline_data(inode); if (res) return res; /* * If a journal handle was specified, then the encryption context is * being set on a new inode via inheritance and is part of a larger * transaction to create the inode. Otherwise the encryption context is * being set on an existing inode in its own transaction. Only in the * latter case should the "retry on ENOSPC" logic be used. */ if (handle) { res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION, EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len, 0); if (!res) { ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); /* * Update inode->i_flags - S_ENCRYPTED will be enabled, * S_DAX may be disabled */ ext4_set_inode_flags(inode, false); } return res; } res = dquot_initialize(inode); if (res) return res; retry: res = ext4_xattr_set_credits(inode, len, false /* is_create */, &credits); if (res) return res; handle = ext4_journal_start(inode, EXT4_HT_MISC, credits); if (IS_ERR(handle)) return PTR_ERR(handle); res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION, EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len, 0); if (!res) { ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); /* * Update inode->i_flags - S_ENCRYPTED will be enabled, * S_DAX may be disabled */ ext4_set_inode_flags(inode, false); res = ext4_mark_inode_dirty(handle, inode); if (res) EXT4_ERROR_INODE(inode, "Failed to mark inode dirty"); } res2 = ext4_journal_stop(handle); if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; if (!res) res = res2; return res; } static const union fscrypt_policy *ext4_get_dummy_policy(struct super_block *sb) { return EXT4_SB(sb)->s_dummy_enc_policy.policy; } static bool ext4_has_stable_inodes(struct super_block *sb) { return ext4_has_feature_stable_inodes(sb); } const struct fscrypt_operations ext4_cryptops = { .needs_bounce_pages = 1, .has_32bit_inodes = 1, .supports_subblock_data_units = 1, .legacy_key_prefix = "ext4:", .get_context = ext4_get_context, .set_context = ext4_set_context, .get_dummy_policy = ext4_get_dummy_policy, .empty_dir = ext4_empty_dir, .has_stable_inodes = ext4_has_stable_inodes, }; |
18 17 2 3 1 15 1 15 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * * namei.c */ /* * This file implements code to do filename lookup in directories. * * Like inodes, directories are packed into compressed metadata blocks, stored * in a directory table. Directories are accessed using the start address of * the metablock containing the directory and the offset into the * decompressed block (<block, offset>). * * Directories are organised in a slightly complex way, and are not simply * a list of file names. The organisation takes advantage of the * fact that (in most cases) the inodes of the files will be in the same * compressed metadata block, and therefore, can share the start block. * Directories are therefore organised in a two level list, a directory * header containing the shared start block value, and a sequence of directory * entries, each of which share the shared start block. A new directory header * is written once/if the inode start block changes. The directory * header/directory entry list is repeated as many times as necessary. * * Directories are sorted, and can contain a directory index to speed up * file lookup. Directory indexes store one entry per metablock, each entry * storing the index/filename mapping to the first directory header * in each metadata block. Directories are sorted in alphabetical order, * and at lookup the index is scanned linearly looking for the first filename * alphabetically larger than the filename being looked up. At this point the * location of the metadata block the filename is in has been found. * The general idea of the index is ensure only one metadata block needs to be * decompressed to do a lookup irrespective of the length of the directory. * This scheme has the advantage that it doesn't require extra memory overhead * and doesn't require much extra storage on disk. */ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/dcache.h> #include <linux/xattr.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs_fs_i.h" #include "squashfs.h" #include "xattr.h" /* * Lookup name in the directory index, returning the location of the metadata * block containing it, and the directory index this represents. * * If we get an error reading the index then return the part of the index * (if any) we have managed to read - the index isn't essential, just * quicker. */ static int get_dir_index_using_name(struct super_block *sb, u64 *next_block, int *next_offset, u64 index_start, int index_offset, int i_count, const char *name, int len) { struct squashfs_sb_info *msblk = sb->s_fs_info; int i, length = 0, err; unsigned int size; struct squashfs_dir_index *index; char *str; TRACE("Entered get_dir_index_using_name, i_count %d\n", i_count); index = kmalloc(sizeof(*index) + SQUASHFS_NAME_LEN * 2 + 2, GFP_KERNEL); if (index == NULL) { ERROR("Failed to allocate squashfs_dir_index\n"); goto out; } str = &index->name[SQUASHFS_NAME_LEN + 1]; strncpy(str, name, len); str[len] = '\0'; for (i = 0; i < i_count; i++) { err = squashfs_read_metadata(sb, index, &index_start, &index_offset, sizeof(*index)); if (err < 0) break; size = le32_to_cpu(index->size) + 1; if (size > SQUASHFS_NAME_LEN) break; err = squashfs_read_metadata(sb, index->name, &index_start, &index_offset, size); if (err < 0) break; index->name[size] = '\0'; if (strcmp(index->name, str) > 0) break; length = le32_to_cpu(index->index); *next_block = le32_to_cpu(index->start_block) + msblk->directory_table; } *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; kfree(index); out: /* * Return index (f_pos) of the looked up metadata block. Translate * from internal f_pos to external f_pos which is offset by 3 because * we invent "." and ".." entries which are not actually stored in the * directory. */ return length + 3; } static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { const unsigned char *name = dentry->d_name.name; int len = dentry->d_name.len; struct inode *inode = NULL; struct squashfs_sb_info *msblk = dir->i_sb->s_fs_info; struct squashfs_dir_header dirh; struct squashfs_dir_entry *dire; u64 block = squashfs_i(dir)->start + msblk->directory_table; int offset = squashfs_i(dir)->offset; int err, length; unsigned int dir_count, size; TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset); dire = kmalloc(sizeof(*dire) + SQUASHFS_NAME_LEN + 1, GFP_KERNEL); if (dire == NULL) { ERROR("Failed to allocate squashfs_dir_entry\n"); return ERR_PTR(-ENOMEM); } if (len > SQUASHFS_NAME_LEN) { err = -ENAMETOOLONG; goto failed; } length = get_dir_index_using_name(dir->i_sb, &block, &offset, squashfs_i(dir)->dir_idx_start, squashfs_i(dir)->dir_idx_offset, squashfs_i(dir)->dir_idx_cnt, name, len); while (length < i_size_read(dir)) { /* * Read directory header. */ err = squashfs_read_metadata(dir->i_sb, &dirh, &block, &offset, sizeof(dirh)); if (err < 0) goto read_failure; length += sizeof(dirh); dir_count = le32_to_cpu(dirh.count) + 1; if (dir_count > SQUASHFS_DIR_COUNT) goto data_error; while (dir_count--) { /* * Read directory entry. */ err = squashfs_read_metadata(dir->i_sb, dire, &block, &offset, sizeof(*dire)); if (err < 0) goto read_failure; size = le16_to_cpu(dire->size) + 1; /* size should never be larger than SQUASHFS_NAME_LEN */ if (size > SQUASHFS_NAME_LEN) goto data_error; err = squashfs_read_metadata(dir->i_sb, dire->name, &block, &offset, size); if (err < 0) goto read_failure; length += sizeof(*dire) + size; if (name[0] < dire->name[0]) goto exit_lookup; if (len == size && !strncmp(name, dire->name, len)) { unsigned int blk, off, ino_num; long long ino; blk = le32_to_cpu(dirh.start_block); off = le16_to_cpu(dire->offset); ino_num = le32_to_cpu(dirh.inode_number) + (short) le16_to_cpu(dire->inode_number); ino = SQUASHFS_MKINODE(blk, off); TRACE("calling squashfs_iget for directory " "entry %s, inode %x:%x, %d\n", name, blk, off, ino_num); inode = squashfs_iget(dir->i_sb, ino, ino_num); goto exit_lookup; } } } exit_lookup: kfree(dire); return d_splice_alias(inode, dentry); data_error: err = -EIO; read_failure: ERROR("Unable to read directory block [%llx:%x]\n", squashfs_i(dir)->start + msblk->directory_table, squashfs_i(dir)->offset); failed: kfree(dire); return ERR_PTR(err); } const struct inode_operations squashfs_dir_inode_ops = { .lookup = squashfs_lookup, .listxattr = squashfs_listxattr }; |
62 7 7 60 25 40 29 10 30 8 30 30 30 8 22 30 24 24 61 61 9 26 26 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 1 1 2 2 2 15 21 23 24 22 22 1 1 16 19 15 21 11 18 80 56 22 1 2 3 21 16 5 8 7 2 8 4 3 3 6 9 7 14 13 8 8 35 42 70 65 5 16 2 14 14 1 2 3 8 2 8 62 2 59 60 1 1 1 14 42 45 36 36 13 36 11 2 23 11 2 22 70 8 60 3 1 56 6 6 50 9 47 1 45 45 9 9 36 5 23 10 12 1 1 22 11 2 13 24 14 10 70 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 | /* * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README * * Trivial changes by Alan Cox to add the LFS fixes * * Trivial Changes: * Rights granted to Hans Reiser to redistribute under other terms providing * he accepts all liability including but not limited to patent, fitness * for purpose, and direct or indirect claims arising from failure to perform. * * NO WARRANTY */ #include <linux/module.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/time.h> #include <linux/uaccess.h> #include "reiserfs.h" #include "acl.h" #include "xattr.h" #include <linux/init.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/buffer_head.h> #include <linux/exportfs.h> #include <linux/quotaops.h> #include <linux/vfs.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/crc32.h> #include <linux/seq_file.h> struct file_system_type reiserfs_fs_type; static const char reiserfs_3_5_magic_string[] = REISERFS_SUPER_MAGIC_STRING; static const char reiserfs_3_6_magic_string[] = REISER2FS_SUPER_MAGIC_STRING; static const char reiserfs_jr_magic_string[] = REISER2FS_JR_SUPER_MAGIC_STRING; int is_reiserfs_3_5(struct reiserfs_super_block *rs) { return !strncmp(rs->s_v1.s_magic, reiserfs_3_5_magic_string, strlen(reiserfs_3_5_magic_string)); } int is_reiserfs_3_6(struct reiserfs_super_block *rs) { return !strncmp(rs->s_v1.s_magic, reiserfs_3_6_magic_string, strlen(reiserfs_3_6_magic_string)); } int is_reiserfs_jr(struct reiserfs_super_block *rs) { return !strncmp(rs->s_v1.s_magic, reiserfs_jr_magic_string, strlen(reiserfs_jr_magic_string)); } static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs) { return (is_reiserfs_3_5(rs) || is_reiserfs_3_6(rs) || is_reiserfs_jr(rs)); } static int reiserfs_remount(struct super_block *s, int *flags, char *data); static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); static int reiserfs_sync_fs(struct super_block *s, int wait) { struct reiserfs_transaction_handle th; /* * Writeback quota in non-journalled quota case - journalled quota has * no dirty dquots */ dquot_writeback_dquots(s, -1); reiserfs_write_lock(s); if (!journal_begin(&th, s, 1)) if (!journal_end_sync(&th)) reiserfs_flush_old_commits(s); reiserfs_write_unlock(s); return 0; } static void flush_old_commits(struct work_struct *work) { struct reiserfs_sb_info *sbi; struct super_block *s; sbi = container_of(work, struct reiserfs_sb_info, old_work.work); s = sbi->s_journal->j_work_sb; /* * We need s_umount for protecting quota writeback. We have to use * trylock as reiserfs_cancel_old_flush() may be waiting for this work * to complete with s_umount held. */ if (!down_read_trylock(&s->s_umount)) { /* Requeue work if we are not cancelling it */ spin_lock(&sbi->old_work_lock); if (sbi->work_queued == 1) queue_delayed_work(system_long_wq, &sbi->old_work, HZ); spin_unlock(&sbi->old_work_lock); return; } spin_lock(&sbi->old_work_lock); /* Avoid clobbering the cancel state... */ if (sbi->work_queued == 1) sbi->work_queued = 0; spin_unlock(&sbi->old_work_lock); reiserfs_sync_fs(s, 1); up_read(&s->s_umount); } void reiserfs_schedule_old_flush(struct super_block *s) { struct reiserfs_sb_info *sbi = REISERFS_SB(s); unsigned long delay; /* * Avoid scheduling flush when sb is being shut down. It can race * with journal shutdown and free still queued delayed work. */ if (sb_rdonly(s) || !(s->s_flags & SB_ACTIVE)) return; spin_lock(&sbi->old_work_lock); if (!sbi->work_queued) { delay = msecs_to_jiffies(dirty_writeback_interval * 10); queue_delayed_work(system_long_wq, &sbi->old_work, delay); sbi->work_queued = 1; } spin_unlock(&sbi->old_work_lock); } void reiserfs_cancel_old_flush(struct super_block *s) { struct reiserfs_sb_info *sbi = REISERFS_SB(s); spin_lock(&sbi->old_work_lock); /* Make sure no new flushes will be queued */ sbi->work_queued = 2; spin_unlock(&sbi->old_work_lock); cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); } static int reiserfs_freeze(struct super_block *s) { struct reiserfs_transaction_handle th; reiserfs_cancel_old_flush(s); reiserfs_write_lock(s); if (!sb_rdonly(s)) { int err = journal_begin(&th, s, 1); if (err) { reiserfs_block_writes(&th); } else { reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); reiserfs_block_writes(&th); journal_end_sync(&th); } } reiserfs_write_unlock(s); return 0; } static int reiserfs_unfreeze(struct super_block *s) { struct reiserfs_sb_info *sbi = REISERFS_SB(s); reiserfs_allow_writes(s); spin_lock(&sbi->old_work_lock); /* Allow old_work to run again */ sbi->work_queued = 0; spin_unlock(&sbi->old_work_lock); return 0; } extern const struct in_core_key MAX_IN_CORE_KEY; /* * this is used to delete "save link" when there are no items of a * file it points to. It can either happen if unlink is completed but * "save unlink" removal, or if file has both unlink and truncate * pending and as unlink completes first (because key of "save link" * protecting unlink is bigger that a key lf "save link" which * protects truncate), so there left no items to make truncate * completion on */ static int remove_save_link_only(struct super_block *s, struct reiserfs_key *key, int oid_free) { struct reiserfs_transaction_handle th; int err; /* we are going to do one balancing */ err = journal_begin(&th, s, JOURNAL_PER_BALANCE_CNT); if (err) return err; reiserfs_delete_solid_item(&th, NULL, key); if (oid_free) /* removals are protected by direct items */ reiserfs_release_objectid(&th, le32_to_cpu(key->k_objectid)); return journal_end(&th); } #ifdef CONFIG_QUOTA static int reiserfs_quota_on_mount(struct super_block *, int); #endif /* * Look for uncompleted unlinks and truncates and complete them * * Called with superblock write locked. If quotas are enabled, we have to * release/retake lest we call dquot_quota_on_mount(), proceed to * schedule_on_each_cpu() in invalidate_bdev() and deadlock waiting for the per * cpu worklets to complete flush_async_commits() that in turn wait for the * superblock write lock. */ static int finish_unfinished(struct super_block *s) { INITIALIZE_PATH(path); struct cpu_key max_cpu_key, obj_key; struct reiserfs_key save_link_key, last_inode_key; int retval = 0; struct item_head *ih; struct buffer_head *bh; int item_pos; char *item; int done; struct inode *inode; int truncate; #ifdef CONFIG_QUOTA int i; int ms_active_set; int quota_enabled[REISERFS_MAXQUOTAS]; #endif /* compose key to look for "save" links */ max_cpu_key.version = KEY_FORMAT_3_5; max_cpu_key.on_disk_key.k_dir_id = ~0U; max_cpu_key.on_disk_key.k_objectid = ~0U; set_cpu_key_k_offset(&max_cpu_key, ~0U); max_cpu_key.key_length = 3; memset(&last_inode_key, 0, sizeof(last_inode_key)); #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ if (s->s_flags & SB_ACTIVE) { ms_active_set = 0; } else { ms_active_set = 1; s->s_flags |= SB_ACTIVE; } /* Turn on quotas so that they are updated correctly */ for (i = 0; i < REISERFS_MAXQUOTAS; i++) { quota_enabled[i] = 1; if (REISERFS_SB(s)->s_qf_names[i]) { int ret; if (sb_has_quota_active(s, i)) { quota_enabled[i] = 0; continue; } reiserfs_write_unlock(s); ret = reiserfs_quota_on_mount(s, i); reiserfs_write_lock(s); if (ret < 0) reiserfs_warning(s, "reiserfs-2500", "cannot turn on journaled " "quota: error %d", ret); } } #endif done = 0; REISERFS_SB(s)->s_is_unlinked_ok = 1; while (!retval) { int depth; retval = search_item(s, &max_cpu_key, &path); if (retval != ITEM_NOT_FOUND) { reiserfs_error(s, "vs-2140", "search_by_key returned %d", retval); break; } bh = get_last_bh(&path); item_pos = get_item_pos(&path); if (item_pos != B_NR_ITEMS(bh)) { reiserfs_warning(s, "vs-2060", "wrong position found"); break; } item_pos--; ih = item_head(bh, item_pos); if (le32_to_cpu(ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID) /* there are no "save" links anymore */ break; save_link_key = ih->ih_key; if (is_indirect_le_ih(ih)) truncate = 1; else truncate = 0; /* reiserfs_iget needs k_dirid and k_objectid only */ item = ih_item_body(bh, ih); obj_key.on_disk_key.k_dir_id = le32_to_cpu(*(__le32 *) item); obj_key.on_disk_key.k_objectid = le32_to_cpu(ih->ih_key.k_objectid); obj_key.on_disk_key.k_offset = 0; obj_key.on_disk_key.k_type = 0; pathrelse(&path); inode = reiserfs_iget(s, &obj_key); if (IS_ERR_OR_NULL(inode)) { /* * the unlink almost completed, it just did not * manage to remove "save" link and release objectid */ reiserfs_warning(s, "vs-2180", "iget failed for %K", &obj_key); retval = remove_save_link_only(s, &save_link_key, 1); continue; } if (!truncate && inode->i_nlink) { /* file is not unlinked */ reiserfs_warning(s, "vs-2185", "file %K is not unlinked", &obj_key); retval = remove_save_link_only(s, &save_link_key, 0); continue; } depth = reiserfs_write_unlock_nested(inode->i_sb); dquot_initialize(inode); reiserfs_write_lock_nested(inode->i_sb, depth); if (truncate && S_ISDIR(inode->i_mode)) { /* * We got a truncate request for a dir which * is impossible. The only imaginable way is to * execute unfinished truncate request then boot * into old kernel, remove the file and create dir * with the same key. */ reiserfs_warning(s, "green-2101", "impossible truncate on a " "directory %k. Please report", INODE_PKEY(inode)); retval = remove_save_link_only(s, &save_link_key, 0); truncate = 0; iput(inode); continue; } if (truncate) { REISERFS_I(inode)->i_flags |= i_link_saved_truncate_mask; /* * not completed truncate found. New size was * committed together with "save" link */ reiserfs_info(s, "Truncating %k to %lld ..", INODE_PKEY(inode), inode->i_size); /* don't update modification time */ reiserfs_truncate_file(inode, 0); retval = remove_save_link(inode, truncate); } else { REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; /* not completed unlink (rmdir) found */ reiserfs_info(s, "Removing %k..", INODE_PKEY(inode)); if (memcmp(&last_inode_key, INODE_PKEY(inode), sizeof(last_inode_key))){ last_inode_key = *INODE_PKEY(inode); /* removal gets completed in iput */ retval = 0; } else { reiserfs_warning(s, "super-2189", "Dead loop " "in finish_unfinished " "detected, just remove " "save link\n"); retval = remove_save_link_only(s, &save_link_key, 0); } } iput(inode); printk("done\n"); done++; } REISERFS_SB(s)->s_is_unlinked_ok = 0; #ifdef CONFIG_QUOTA /* Turn quotas off */ reiserfs_write_unlock(s); for (i = 0; i < REISERFS_MAXQUOTAS; i++) { if (sb_dqopt(s)->files[i] && quota_enabled[i]) dquot_quota_off(s, i); } reiserfs_write_lock(s); if (ms_active_set) /* Restore the flag back */ s->s_flags &= ~SB_ACTIVE; #endif pathrelse(&path); if (done) reiserfs_info(s, "There were %d uncompleted unlinks/truncates. " "Completed\n", done); return retval; } /* * to protect file being unlinked from getting lost we "safe" link files * being unlinked. This link will be deleted in the same transaction with last * item of file. mounting the filesystem we scan all these links and remove * files which almost got lost */ void add_save_link(struct reiserfs_transaction_handle *th, struct inode *inode, int truncate) { INITIALIZE_PATH(path); int retval; struct cpu_key key; struct item_head ih; __le32 link; BUG_ON(!th->t_trans_id); /* file can only get one "save link" of each kind */ RFALSE(truncate && (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask), "saved link already exists for truncated inode %lx", (long)inode->i_ino); RFALSE(!truncate && (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask), "saved link already exists for unlinked inode %lx", (long)inode->i_ino); /* setup key of "save" link */ key.version = KEY_FORMAT_3_5; key.on_disk_key.k_dir_id = MAX_KEY_OBJECTID; key.on_disk_key.k_objectid = inode->i_ino; if (!truncate) { /* unlink, rmdir, rename */ set_cpu_key_k_offset(&key, 1 + inode->i_sb->s_blocksize); set_cpu_key_k_type(&key, TYPE_DIRECT); /* item head of "safe" link */ make_le_item_head(&ih, &key, key.version, 1 + inode->i_sb->s_blocksize, TYPE_DIRECT, 4 /*length */ , 0xffff /*free space */ ); } else { /* truncate */ if (S_ISDIR(inode->i_mode)) reiserfs_warning(inode->i_sb, "green-2102", "Adding a truncate savelink for " "a directory %k! Please report", INODE_PKEY(inode)); set_cpu_key_k_offset(&key, 1); set_cpu_key_k_type(&key, TYPE_INDIRECT); /* item head of "safe" link */ make_le_item_head(&ih, &key, key.version, 1, TYPE_INDIRECT, 4 /*length */ , 0 /*free space */ ); } key.key_length = 3; /* look for its place in the tree */ retval = search_item(inode->i_sb, &key, &path); if (retval != ITEM_NOT_FOUND) { if (retval != -ENOSPC) reiserfs_error(inode->i_sb, "vs-2100", "search_by_key (%K) returned %d", &key, retval); pathrelse(&path); return; } /* body of "save" link */ link = INODE_PKEY(inode)->k_dir_id; /* put "save" link into tree, don't charge quota to anyone */ retval = reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link); if (retval) { if (retval != -ENOSPC) reiserfs_error(inode->i_sb, "vs-2120", "insert_item returned %d", retval); } else { if (truncate) REISERFS_I(inode)->i_flags |= i_link_saved_truncate_mask; else REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; } } /* this opens transaction unlike add_save_link */ int remove_save_link(struct inode *inode, int truncate) { struct reiserfs_transaction_handle th; struct reiserfs_key key; int err; /* we are going to do one balancing only */ err = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); if (err) return err; /* setup key of "save" link */ key.k_dir_id = cpu_to_le32(MAX_KEY_OBJECTID); key.k_objectid = INODE_PKEY(inode)->k_objectid; if (!truncate) { /* unlink, rmdir, rename */ set_le_key_k_offset(KEY_FORMAT_3_5, &key, 1 + inode->i_sb->s_blocksize); set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_DIRECT); } else { /* truncate */ set_le_key_k_offset(KEY_FORMAT_3_5, &key, 1); set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_INDIRECT); } if ((truncate && (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask)) || (!truncate && (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask))) /* don't take quota bytes from anywhere */ reiserfs_delete_solid_item(&th, NULL, &key); if (!truncate) { reiserfs_release_objectid(&th, inode->i_ino); REISERFS_I(inode)->i_flags &= ~i_link_saved_unlink_mask; } else REISERFS_I(inode)->i_flags &= ~i_link_saved_truncate_mask; return journal_end(&th); } static void reiserfs_kill_sb(struct super_block *s) { if (REISERFS_SB(s)) { reiserfs_proc_info_done(s); /* * Force any pending inode evictions to occur now. Any * inodes to be removed that have extended attributes * associated with them need to clean them up before * we can release the extended attribute root dentries. * shrink_dcache_for_umount will BUG if we don't release * those before it's called so ->put_super is too late. */ shrink_dcache_sb(s); dput(REISERFS_SB(s)->xattr_root); REISERFS_SB(s)->xattr_root = NULL; dput(REISERFS_SB(s)->priv_root); REISERFS_SB(s)->priv_root = NULL; } kill_block_super(s); } #ifdef CONFIG_QUOTA static int reiserfs_quota_off(struct super_block *sb, int type); static void reiserfs_quota_off_umount(struct super_block *s) { int type; for (type = 0; type < REISERFS_MAXQUOTAS; type++) reiserfs_quota_off(s, type); } #else static inline void reiserfs_quota_off_umount(struct super_block *s) { } #endif static void reiserfs_put_super(struct super_block *s) { struct reiserfs_transaction_handle th; th.t_trans_id = 0; reiserfs_quota_off_umount(s); reiserfs_write_lock(s); /* * change file system state to current state if it was mounted * with read-write permissions */ if (!sb_rdonly(s)) { if (!journal_begin(&th, s, 10)) { reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); set_sb_umount_state(SB_DISK_SUPER_BLOCK(s), REISERFS_SB(s)->s_mount_state); journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); } } /* * note, journal_release checks for readonly mount, and can * decide not to do a journal_end */ journal_release(&th, s); reiserfs_free_bitmap_cache(s); brelse(SB_BUFFER_WITH_SB(s)); print_statistics(s); if (REISERFS_SB(s)->reserved_blocks != 0) { reiserfs_warning(s, "green-2005", "reserved blocks left %d", REISERFS_SB(s)->reserved_blocks); } reiserfs_write_unlock(s); mutex_destroy(&REISERFS_SB(s)->lock); destroy_workqueue(REISERFS_SB(s)->commit_wq); kfree(REISERFS_SB(s)->s_jdev); kfree(s->s_fs_info); s->s_fs_info = NULL; } static struct kmem_cache *reiserfs_inode_cachep; static struct inode *reiserfs_alloc_inode(struct super_block *sb) { struct reiserfs_inode_info *ei; ei = alloc_inode_sb(sb, reiserfs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; atomic_set(&ei->openers, 0); mutex_init(&ei->tailpack); #ifdef CONFIG_QUOTA memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); #endif return &ei->vfs_inode; } static void reiserfs_free_inode(struct inode *inode) { kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); } static void init_once(void *foo) { struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; INIT_LIST_HEAD(&ei->i_prealloc_list); inode_init_once(&ei->vfs_inode); } static int __init init_inodecache(void) { reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache", sizeof(struct reiserfs_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD| SLAB_ACCOUNT), init_once); if (reiserfs_inode_cachep == NULL) return -ENOMEM; return 0; } static void destroy_inodecache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(reiserfs_inode_cachep); } /* we don't mark inodes dirty, we just log them */ static void reiserfs_dirty_inode(struct inode *inode, int flags) { struct reiserfs_transaction_handle th; int err = 0; if (sb_rdonly(inode->i_sb)) { reiserfs_warning(inode->i_sb, "clm-6006", "writing inode %lu on readonly FS", inode->i_ino); return; } reiserfs_write_lock(inode->i_sb); /* * this is really only used for atime updates, so they don't have * to be included in O_SYNC or fsync */ err = journal_begin(&th, inode->i_sb, 1); if (err) goto out; reiserfs_update_sd(&th, inode); journal_end(&th); out: reiserfs_write_unlock(inode->i_sb); } static int reiserfs_show_options(struct seq_file *seq, struct dentry *root) { struct super_block *s = root->d_sb; struct reiserfs_journal *journal = SB_JOURNAL(s); long opts = REISERFS_SB(s)->s_mount_opt; if (opts & (1 << REISERFS_LARGETAIL)) seq_puts(seq, ",tails=on"); else if (!(opts & (1 << REISERFS_SMALLTAIL))) seq_puts(seq, ",notail"); /* tails=small is default so we don't show it */ if (!(opts & (1 << REISERFS_BARRIER_FLUSH))) seq_puts(seq, ",barrier=none"); /* barrier=flush is default so we don't show it */ if (opts & (1 << REISERFS_ERROR_CONTINUE)) seq_puts(seq, ",errors=continue"); else if (opts & (1 << REISERFS_ERROR_PANIC)) seq_puts(seq, ",errors=panic"); /* errors=ro is default so we don't show it */ if (opts & (1 << REISERFS_DATA_LOG)) seq_puts(seq, ",data=journal"); else if (opts & (1 << REISERFS_DATA_WRITEBACK)) seq_puts(seq, ",data=writeback"); /* data=ordered is default so we don't show it */ if (opts & (1 << REISERFS_ATTRS)) seq_puts(seq, ",attrs"); if (opts & (1 << REISERFS_XATTRS_USER)) seq_puts(seq, ",user_xattr"); if (opts & (1 << REISERFS_EXPOSE_PRIVROOT)) seq_puts(seq, ",expose_privroot"); if (opts & (1 << REISERFS_POSIXACL)) seq_puts(seq, ",acl"); if (REISERFS_SB(s)->s_jdev) seq_show_option(seq, "jdev", REISERFS_SB(s)->s_jdev); if (journal->j_max_commit_age != journal->j_default_max_commit_age) seq_printf(seq, ",commit=%d", journal->j_max_commit_age); #ifdef CONFIG_QUOTA if (REISERFS_SB(s)->s_qf_names[USRQUOTA]) seq_show_option(seq, "usrjquota", REISERFS_SB(s)->s_qf_names[USRQUOTA]); else if (opts & (1 << REISERFS_USRQUOTA)) seq_puts(seq, ",usrquota"); if (REISERFS_SB(s)->s_qf_names[GRPQUOTA]) seq_show_option(seq, "grpjquota", REISERFS_SB(s)->s_qf_names[GRPQUOTA]); else if (opts & (1 << REISERFS_GRPQUOTA)) seq_puts(seq, ",grpquota"); if (REISERFS_SB(s)->s_jquota_fmt) { if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_OLD) seq_puts(seq, ",jqfmt=vfsold"); else if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_V0) seq_puts(seq, ",jqfmt=vfsv0"); } #endif /* Block allocator options */ if (opts & (1 << REISERFS_NO_BORDER)) seq_puts(seq, ",block-allocator=noborder"); if (opts & (1 << REISERFS_NO_UNHASHED_RELOCATION)) seq_puts(seq, ",block-allocator=no_unhashed_relocation"); if (opts & (1 << REISERFS_HASHED_RELOCATION)) seq_puts(seq, ",block-allocator=hashed_relocation"); if (opts & (1 << REISERFS_TEST4)) seq_puts(seq, ",block-allocator=test4"); show_alloc_options(seq, s); return 0; } #ifdef CONFIG_QUOTA static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, size_t, loff_t); static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t, loff_t); static struct dquot **reiserfs_get_dquots(struct inode *inode) { return REISERFS_I(inode)->i_dquot; } #endif static const struct super_operations reiserfs_sops = { .alloc_inode = reiserfs_alloc_inode, .free_inode = reiserfs_free_inode, .write_inode = reiserfs_write_inode, .dirty_inode = reiserfs_dirty_inode, .evict_inode = reiserfs_evict_inode, .put_super = reiserfs_put_super, .sync_fs = reiserfs_sync_fs, .freeze_fs = reiserfs_freeze, .unfreeze_fs = reiserfs_unfreeze, .statfs = reiserfs_statfs, .remount_fs = reiserfs_remount, .show_options = reiserfs_show_options, #ifdef CONFIG_QUOTA .quota_read = reiserfs_quota_read, .quota_write = reiserfs_quota_write, .get_dquots = reiserfs_get_dquots, #endif }; #ifdef CONFIG_QUOTA #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") static int reiserfs_write_dquot(struct dquot *); static int reiserfs_acquire_dquot(struct dquot *); static int reiserfs_release_dquot(struct dquot *); static int reiserfs_mark_dquot_dirty(struct dquot *); static int reiserfs_write_info(struct super_block *, int); static int reiserfs_quota_on(struct super_block *, int, int, const struct path *); static const struct dquot_operations reiserfs_quota_operations = { .write_dquot = reiserfs_write_dquot, .acquire_dquot = reiserfs_acquire_dquot, .release_dquot = reiserfs_release_dquot, .mark_dirty = reiserfs_mark_dquot_dirty, .write_info = reiserfs_write_info, .alloc_dquot = dquot_alloc, .destroy_dquot = dquot_destroy, .get_next_id = dquot_get_next_id, }; static const struct quotactl_ops reiserfs_qctl_operations = { .quota_on = reiserfs_quota_on, .quota_off = reiserfs_quota_off, .quota_sync = dquot_quota_sync, .get_state = dquot_get_state, .set_info = dquot_set_dqinfo, .get_dqblk = dquot_get_dqblk, .set_dqblk = dquot_set_dqblk, }; #endif static const struct export_operations reiserfs_export_ops = { .encode_fh = reiserfs_encode_fh, .fh_to_dentry = reiserfs_fh_to_dentry, .fh_to_parent = reiserfs_fh_to_parent, .get_parent = reiserfs_get_parent, }; /* * this struct is used in reiserfs_getopt () for containing the value for * those mount options that have values rather than being toggles. */ typedef struct { char *value; /* * bitmask which is to set on mount_options bitmask * when this value is found, 0 is no bits are to be changed. */ int setmask; /* * bitmask which is to clear on mount_options bitmask * when this value is found, 0 is no bits are to be changed. * This is applied BEFORE setmask */ int clrmask; } arg_desc_t; /* Set this bit in arg_required to allow empty arguments */ #define REISERFS_OPT_ALLOWEMPTY 31 /* * this struct is used in reiserfs_getopt() for describing the * set of reiserfs mount options */ typedef struct { char *option_name; /* 0 if argument is not required, not 0 otherwise */ int arg_required; /* list of values accepted by an option */ const arg_desc_t *values; /* * bitmask which is to set on mount_options bitmask * when this value is found, 0 is no bits are to be changed. */ int setmask; /* * bitmask which is to clear on mount_options bitmask * when this value is found, 0 is no bits are to be changed. * This is applied BEFORE setmask */ int clrmask; } opt_desc_t; /* possible values for -o data= */ static const arg_desc_t logging_mode[] = { {"ordered", 1 << REISERFS_DATA_ORDERED, (1 << REISERFS_DATA_LOG | 1 << REISERFS_DATA_WRITEBACK)}, {"journal", 1 << REISERFS_DATA_LOG, (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_WRITEBACK)}, {"writeback", 1 << REISERFS_DATA_WRITEBACK, (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_LOG)}, {.value = NULL} }; /* possible values for -o barrier= */ static const arg_desc_t barrier_mode[] = { {"none", 1 << REISERFS_BARRIER_NONE, 1 << REISERFS_BARRIER_FLUSH}, {"flush", 1 << REISERFS_BARRIER_FLUSH, 1 << REISERFS_BARRIER_NONE}, {.value = NULL} }; /* * possible values for "-o block-allocator=" and bits which are to be set in * s_mount_opt of reiserfs specific part of in-core super block */ static const arg_desc_t balloc[] = { {"noborder", 1 << REISERFS_NO_BORDER, 0}, {"border", 0, 1 << REISERFS_NO_BORDER}, {"no_unhashed_relocation", 1 << REISERFS_NO_UNHASHED_RELOCATION, 0}, {"hashed_relocation", 1 << REISERFS_HASHED_RELOCATION, 0}, {"test4", 1 << REISERFS_TEST4, 0}, {"notest4", 0, 1 << REISERFS_TEST4}, {NULL, 0, 0} }; static const arg_desc_t tails[] = { {"on", 1 << REISERFS_LARGETAIL, 1 << REISERFS_SMALLTAIL}, {"off", 0, (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)}, {"small", 1 << REISERFS_SMALLTAIL, 1 << REISERFS_LARGETAIL}, {NULL, 0, 0} }; static const arg_desc_t error_actions[] = { {"panic", 1 << REISERFS_ERROR_PANIC, (1 << REISERFS_ERROR_RO | 1 << REISERFS_ERROR_CONTINUE)}, {"ro-remount", 1 << REISERFS_ERROR_RO, (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_CONTINUE)}, #ifdef REISERFS_JOURNAL_ERROR_ALLOWS_NO_LOG {"continue", 1 << REISERFS_ERROR_CONTINUE, (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_RO)}, #endif {NULL, 0, 0}, }; /* * proceed only one option from a list *cur - string containing of mount * options * opts - array of options which are accepted * opt_arg - if option is found and requires an argument and if it is specifed * in the input - pointer to the argument is stored here * bit_flags - if option requires to set a certain bit - it is set here * return -1 if unknown option is found, opt->arg_required otherwise */ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, char **opt_arg, unsigned long *bit_flags) { char *p; /* * foo=bar, * ^ ^ ^ * | | +-- option_end * | +-- arg_start * +-- option_start */ const opt_desc_t *opt; const arg_desc_t *arg; p = *cur; /* assume argument cannot contain commas */ *cur = strchr(p, ','); if (*cur) { *(*cur) = '\0'; (*cur)++; } if (!strncmp(p, "alloc=", 6)) { /* * Ugly special case, probably we should redo options * parser so that it can understand several arguments for * some options, also so that it can fill several bitfields * with option values. */ if (reiserfs_parse_alloc_options(s, p + 6)) { return -1; } else { return 0; } } /* for every option in the list */ for (opt = opts; opt->option_name; opt++) { if (!strncmp(p, opt->option_name, strlen(opt->option_name))) { if (bit_flags) { if (opt->clrmask == (1 << REISERFS_UNSUPPORTED_OPT)) reiserfs_warning(s, "super-6500", "%s not supported.\n", p); else *bit_flags &= ~opt->clrmask; if (opt->setmask == (1 << REISERFS_UNSUPPORTED_OPT)) reiserfs_warning(s, "super-6501", "%s not supported.\n", p); else *bit_flags |= opt->setmask; } break; } } if (!opt->option_name) { reiserfs_warning(s, "super-6502", "unknown mount option \"%s\"", p); return -1; } p += strlen(opt->option_name); switch (*p) { case '=': if (!opt->arg_required) { reiserfs_warning(s, "super-6503", "the option \"%s\" does not " "require an argument\n", opt->option_name); return -1; } break; case 0: if (opt->arg_required) { reiserfs_warning(s, "super-6504", "the option \"%s\" requires an " "argument\n", opt->option_name); return -1; } break; default: reiserfs_warning(s, "super-6505", "head of option \"%s\" is only correct\n", opt->option_name); return -1; } /* * move to the argument, or to next option if argument is not * required */ p++; if (opt->arg_required && !(opt->arg_required & (1 << REISERFS_OPT_ALLOWEMPTY)) && !strlen(p)) { /* this catches "option=," if not allowed */ reiserfs_warning(s, "super-6506", "empty argument for \"%s\"\n", opt->option_name); return -1; } if (!opt->values) { /* *=NULLopt_arg contains pointer to argument */ *opt_arg = p; return opt->arg_required & ~(1 << REISERFS_OPT_ALLOWEMPTY); } /* values possible for this option are listed in opt->values */ for (arg = opt->values; arg->value; arg++) { if (!strcmp(p, arg->value)) { if (bit_flags) { *bit_flags &= ~arg->clrmask; *bit_flags |= arg->setmask; } return opt->arg_required; } } reiserfs_warning(s, "super-6506", "bad value \"%s\" for option \"%s\"\n", p, opt->option_name); return -1; } /* returns 0 if something is wrong in option string, 1 - otherwise */ static int reiserfs_parse_options(struct super_block *s, /* string given via mount's -o */ char *options, /* * after the parsing phase, contains the * collection of bitflags defining what * mount options were selected. */ unsigned long *mount_options, /* strtol-ed from NNN of resize=NNN */ unsigned long *blocks, char **jdev_name, unsigned int *commit_max_age, char **qf_names, unsigned int *qfmt) { int c; char *arg = NULL; char *pos; opt_desc_t opts[] = { /* * Compatibility stuff, so that -o notail for old * setups still work */ {"tails",.arg_required = 't',.values = tails}, {"notail",.clrmask = (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)}, {"conv",.setmask = 1 << REISERFS_CONVERT}, {"attrs",.setmask = 1 << REISERFS_ATTRS}, {"noattrs",.clrmask = 1 << REISERFS_ATTRS}, {"expose_privroot", .setmask = 1 << REISERFS_EXPOSE_PRIVROOT}, #ifdef CONFIG_REISERFS_FS_XATTR {"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER}, {"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER}, #else {"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT}, {"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT}, #endif #ifdef CONFIG_REISERFS_FS_POSIX_ACL {"acl",.setmask = 1 << REISERFS_POSIXACL}, {"noacl",.clrmask = 1 << REISERFS_POSIXACL}, #else {"acl",.setmask = 1 << REISERFS_UNSUPPORTED_OPT}, {"noacl",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT}, #endif {.option_name = "nolog"}, {"replayonly",.setmask = 1 << REPLAYONLY}, {"block-allocator",.arg_required = 'a',.values = balloc}, {"data",.arg_required = 'd',.values = logging_mode}, {"barrier",.arg_required = 'b',.values = barrier_mode}, {"resize",.arg_required = 'r',.values = NULL}, {"jdev",.arg_required = 'j',.values = NULL}, {"nolargeio",.arg_required = 'w',.values = NULL}, {"commit",.arg_required = 'c',.values = NULL}, {"usrquota",.setmask = 1 << REISERFS_USRQUOTA}, {"grpquota",.setmask = 1 << REISERFS_GRPQUOTA}, {"noquota",.clrmask = 1 << REISERFS_USRQUOTA | 1 << REISERFS_GRPQUOTA}, {"errors",.arg_required = 'e',.values = error_actions}, {"usrjquota",.arg_required = 'u' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL}, {"grpjquota",.arg_required = 'g' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL}, {"jqfmt",.arg_required = 'f',.values = NULL}, {.option_name = NULL} }; *blocks = 0; if (!options || !*options) /* * use default configuration: create tails, journaling on, no * conversion to newest format */ return 1; for (pos = options; pos;) { c = reiserfs_getopt(s, &pos, opts, &arg, mount_options); if (c == -1) /* wrong option is given */ return 0; if (c == 'r') { char *p; p = NULL; /* "resize=NNN" or "resize=auto" */ if (!strcmp(arg, "auto")) { /* From JFS code, to auto-get the size. */ *blocks = sb_bdev_nr_blocks(s); } else { *blocks = simple_strtoul(arg, &p, 0); if (*p != '\0') { /* NNN does not look like a number */ reiserfs_warning(s, "super-6507", "bad value %s for " "-oresize\n", arg); return 0; } } } if (c == 'c') { char *p = NULL; unsigned long val = simple_strtoul(arg, &p, 0); /* commit=NNN (time in seconds) */ if (*p != '\0' || val >= (unsigned int)-1) { reiserfs_warning(s, "super-6508", "bad value %s for -ocommit\n", arg); return 0; } *commit_max_age = (unsigned int)val; } if (c == 'w') { reiserfs_warning(s, "super-6509", "nolargeio option " "is no longer supported"); return 0; } if (c == 'j') { if (arg && *arg && jdev_name) { /* Hm, already assigned? */ if (*jdev_name) { reiserfs_warning(s, "super-6510", "journal device was " "already specified to " "be %s", *jdev_name); return 0; } *jdev_name = arg; } } #ifdef CONFIG_QUOTA if (c == 'u' || c == 'g') { int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; if (sb_any_quota_loaded(s) && (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) { reiserfs_warning(s, "super-6511", "cannot change journaled " "quota options when quota " "turned on."); return 0; } if (qf_names[qtype] != REISERFS_SB(s)->s_qf_names[qtype]) kfree(qf_names[qtype]); qf_names[qtype] = NULL; if (*arg) { /* Some filename specified? */ if (REISERFS_SB(s)->s_qf_names[qtype] && strcmp(REISERFS_SB(s)->s_qf_names[qtype], arg)) { reiserfs_warning(s, "super-6512", "%s quota file " "already specified.", QTYPE2NAME(qtype)); return 0; } if (strchr(arg, '/')) { reiserfs_warning(s, "super-6513", "quotafile must be " "on filesystem root."); return 0; } qf_names[qtype] = kstrdup(arg, GFP_KERNEL); if (!qf_names[qtype]) { reiserfs_warning(s, "reiserfs-2502", "not enough memory " "for storing " "quotafile name."); return 0; } if (qtype == USRQUOTA) *mount_options |= 1 << REISERFS_USRQUOTA; else *mount_options |= 1 << REISERFS_GRPQUOTA; } else { if (qtype == USRQUOTA) *mount_options &= ~(1 << REISERFS_USRQUOTA); else *mount_options &= ~(1 << REISERFS_GRPQUOTA); } } if (c == 'f') { if (!strcmp(arg, "vfsold")) *qfmt = QFMT_VFS_OLD; else if (!strcmp(arg, "vfsv0")) *qfmt = QFMT_VFS_V0; else { reiserfs_warning(s, "super-6514", "unknown quota format " "specified."); return 0; } if (sb_any_quota_loaded(s) && *qfmt != REISERFS_SB(s)->s_jquota_fmt) { reiserfs_warning(s, "super-6515", "cannot change journaled " "quota options when quota " "turned on."); return 0; } } #else if (c == 'u' || c == 'g' || c == 'f') { reiserfs_warning(s, "reiserfs-2503", "journaled " "quota options not supported."); return 0; } #endif } #ifdef CONFIG_QUOTA if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) { reiserfs_warning(s, "super-6515", "journaled quota format not specified."); return 0; } if ((!(*mount_options & (1 << REISERFS_USRQUOTA)) && sb_has_quota_loaded(s, USRQUOTA)) || (!(*mount_options & (1 << REISERFS_GRPQUOTA)) && sb_has_quota_loaded(s, GRPQUOTA))) { reiserfs_warning(s, "super-6516", "quota options must " "be present when quota is turned on."); return 0; } #endif return 1; } static void switch_data_mode(struct super_block *s, unsigned long mode) { REISERFS_SB(s)->s_mount_opt &= ~((1 << REISERFS_DATA_LOG) | (1 << REISERFS_DATA_ORDERED) | (1 << REISERFS_DATA_WRITEBACK)); REISERFS_SB(s)->s_mount_opt |= (1 << mode); } static void handle_data_mode(struct super_block *s, unsigned long mount_options) { if (mount_options & (1 << REISERFS_DATA_LOG)) { if (!reiserfs_data_log(s)) { switch_data_mode(s, REISERFS_DATA_LOG); reiserfs_info(s, "switching to journaled data mode\n"); } } else if (mount_options & (1 << REISERFS_DATA_ORDERED)) { if (!reiserfs_data_ordered(s)) { switch_data_mode(s, REISERFS_DATA_ORDERED); reiserfs_info(s, "switching to ordered data mode\n"); } } else if (mount_options & (1 << REISERFS_DATA_WRITEBACK)) { if (!reiserfs_data_writeback(s)) { switch_data_mode(s, REISERFS_DATA_WRITEBACK); reiserfs_info(s, "switching to writeback data mode\n"); } } } static void handle_barrier_mode(struct super_block *s, unsigned long bits) { int flush = (1 << REISERFS_BARRIER_FLUSH); int none = (1 << REISERFS_BARRIER_NONE); int all_barrier = flush | none; if (bits & all_barrier) { REISERFS_SB(s)->s_mount_opt &= ~all_barrier; if (bits & flush) { REISERFS_SB(s)->s_mount_opt |= flush; printk("reiserfs: enabling write barrier flush mode\n"); } else if (bits & none) { REISERFS_SB(s)->s_mount_opt |= none; printk("reiserfs: write barriers turned off\n"); } } } static void handle_attrs(struct super_block *s) { struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); if (reiserfs_attrs(s)) { if (old_format_only(s)) { reiserfs_warning(s, "super-6517", "cannot support " "attributes on 3.5.x disk format"); REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); return; } if (!(le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared)) { reiserfs_warning(s, "super-6518", "cannot support " "attributes until flag is set in " "super-block"); REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); } } } #ifdef CONFIG_QUOTA static void handle_quota_files(struct super_block *s, char **qf_names, unsigned int *qfmt) { int i; for (i = 0; i < REISERFS_MAXQUOTAS; i++) { if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i]) kfree(REISERFS_SB(s)->s_qf_names[i]); REISERFS_SB(s)->s_qf_names[i] = qf_names[i]; } if (*qfmt) REISERFS_SB(s)->s_jquota_fmt = *qfmt; } #endif static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) { struct reiserfs_super_block *rs; struct reiserfs_transaction_handle th; unsigned long blocks; unsigned long mount_options = REISERFS_SB(s)->s_mount_opt; unsigned long safe_mask = 0; unsigned int commit_max_age = (unsigned int)-1; struct reiserfs_journal *journal = SB_JOURNAL(s); int err; char *qf_names[REISERFS_MAXQUOTAS]; unsigned int qfmt = 0; #ifdef CONFIG_QUOTA int i; #endif sync_filesystem(s); reiserfs_write_lock(s); #ifdef CONFIG_QUOTA memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); #endif rs = SB_DISK_SUPER_BLOCK(s); if (!reiserfs_parse_options (s, arg, &mount_options, &blocks, NULL, &commit_max_age, qf_names, &qfmt)) { #ifdef CONFIG_QUOTA for (i = 0; i < REISERFS_MAXQUOTAS; i++) if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i]) kfree(qf_names[i]); #endif err = -EINVAL; goto out_err_unlock; } #ifdef CONFIG_QUOTA handle_quota_files(s, qf_names, &qfmt); #endif handle_attrs(s); /* Add options that are safe here */ safe_mask |= 1 << REISERFS_SMALLTAIL; safe_mask |= 1 << REISERFS_LARGETAIL; safe_mask |= 1 << REISERFS_NO_BORDER; safe_mask |= 1 << REISERFS_NO_UNHASHED_RELOCATION; safe_mask |= 1 << REISERFS_HASHED_RELOCATION; safe_mask |= 1 << REISERFS_TEST4; safe_mask |= 1 << REISERFS_ATTRS; safe_mask |= 1 << REISERFS_XATTRS_USER; safe_mask |= 1 << REISERFS_POSIXACL; safe_mask |= 1 << REISERFS_BARRIER_FLUSH; safe_mask |= 1 << REISERFS_BARRIER_NONE; safe_mask |= 1 << REISERFS_ERROR_RO; safe_mask |= 1 << REISERFS_ERROR_CONTINUE; safe_mask |= 1 << REISERFS_ERROR_PANIC; safe_mask |= 1 << REISERFS_USRQUOTA; safe_mask |= 1 << REISERFS_GRPQUOTA; /* * Update the bitmask, taking care to keep * the bits we're not allowed to change here */ REISERFS_SB(s)->s_mount_opt = (REISERFS_SB(s)-> s_mount_opt & ~safe_mask) | (mount_options & safe_mask); if (commit_max_age != 0 && commit_max_age != (unsigned int)-1) { journal->j_max_commit_age = commit_max_age; journal->j_max_trans_age = commit_max_age; } else if (commit_max_age == 0) { /* 0 means restore defaults. */ journal->j_max_commit_age = journal->j_default_max_commit_age; journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; } if (blocks) { err = reiserfs_resize(s, blocks); if (err != 0) goto out_err_unlock; } if (*mount_flags & SB_RDONLY) { reiserfs_write_unlock(s); reiserfs_xattr_init(s, *mount_flags); /* remount read-only */ if (sb_rdonly(s)) /* it is read-only already */ goto out_ok_unlocked; err = dquot_suspend(s, -1); if (err < 0) goto out_err; /* try to remount file system with read-only permissions */ if (sb_umount_state(rs) == REISERFS_VALID_FS || REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) { goto out_ok_unlocked; } reiserfs_write_lock(s); err = journal_begin(&th, s, 10); if (err) goto out_err_unlock; /* Mounting a rw partition read-only. */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); set_sb_umount_state(rs, REISERFS_SB(s)->s_mount_state); journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); } else { /* remount read-write */ if (!sb_rdonly(s)) { reiserfs_write_unlock(s); reiserfs_xattr_init(s, *mount_flags); goto out_ok_unlocked; /* We are read-write already */ } if (reiserfs_is_journal_aborted(journal)) { err = journal->j_errno; goto out_err_unlock; } handle_data_mode(s, mount_options); handle_barrier_mode(s, mount_options); REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); /* now it is safe to call journal_begin */ s->s_flags &= ~SB_RDONLY; err = journal_begin(&th, s, 10); if (err) goto out_err_unlock; /* Mount a partition which is read-only, read-write */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); s->s_flags &= ~SB_RDONLY; set_sb_umount_state(rs, REISERFS_ERROR_FS); if (!old_format_only(s)) set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */ journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS; } /* this will force a full flush of all journal lists */ SB_JOURNAL(s)->j_must_wait = 1; err = journal_end(&th); if (err) goto out_err_unlock; reiserfs_write_unlock(s); if (!(*mount_flags & SB_RDONLY)) { dquot_resume(s, -1); reiserfs_write_lock(s); finish_unfinished(s); reiserfs_write_unlock(s); reiserfs_xattr_init(s, *mount_flags); } out_ok_unlocked: return 0; out_err_unlock: reiserfs_write_unlock(s); out_err: return err; } static int read_super_block(struct super_block *s, int offset) { struct buffer_head *bh; struct reiserfs_super_block *rs; int fs_blocksize; bh = sb_bread(s, offset / s->s_blocksize); if (!bh) { reiserfs_warning(s, "sh-2006", "bread failed (dev %s, block %lu, size %lu)", s->s_id, offset / s->s_blocksize, s->s_blocksize); return 1; } rs = (struct reiserfs_super_block *)bh->b_data; if (!is_any_reiserfs_magic_string(rs)) { brelse(bh); return 1; } /* * ok, reiserfs signature (old or new) found in at the given offset */ fs_blocksize = sb_blocksize(rs); brelse(bh); sb_set_blocksize(s, fs_blocksize); bh = sb_bread(s, offset / s->s_blocksize); if (!bh) { reiserfs_warning(s, "sh-2007", "bread failed (dev %s, block %lu, size %lu)", s->s_id, offset / s->s_blocksize, s->s_blocksize); return 1; } rs = (struct reiserfs_super_block *)bh->b_data; if (sb_blocksize(rs) != s->s_blocksize) { reiserfs_warning(s, "sh-2011", "can't find a reiserfs " "filesystem on (dev %s, block %llu, size %lu)", s->s_id, (unsigned long long)bh->b_blocknr, s->s_blocksize); brelse(bh); return 1; } if (rs->s_v1.s_root_block == cpu_to_le32(-1)) { brelse(bh); reiserfs_warning(s, "super-6519", "Unfinished reiserfsck " "--rebuild-tree run detected. Please run\n" "reiserfsck --rebuild-tree and wait for a " "completion. If that fails\n" "get newer reiserfsprogs package"); return 1; } reiserfs_warning(NULL, "", "reiserfs filesystem is deprecated and " "scheduled to be removed from the kernel in 2025"); SB_BUFFER_WITH_SB(s) = bh; SB_DISK_SUPER_BLOCK(s) = rs; /* * magic is of non-standard journal filesystem, look at s_version to * find which format is in use */ if (is_reiserfs_jr(rs)) { if (sb_version(rs) == REISERFS_VERSION_2) reiserfs_info(s, "found reiserfs format \"3.6\"" " with non-standard journal\n"); else if (sb_version(rs) == REISERFS_VERSION_1) reiserfs_info(s, "found reiserfs format \"3.5\"" " with non-standard journal\n"); else { reiserfs_warning(s, "sh-2012", "found unknown " "format \"%u\" of reiserfs with " "non-standard magic", sb_version(rs)); return 1; } } else /* * s_version of standard format may contain incorrect * information, so we just look at the magic string */ reiserfs_info(s, "found reiserfs format \"%s\" with standard journal\n", is_reiserfs_3_5(rs) ? "3.5" : "3.6"); s->s_op = &reiserfs_sops; s->s_export_op = &reiserfs_export_ops; #ifdef CONFIG_QUOTA s->s_qcop = &reiserfs_qctl_operations; s->dq_op = &reiserfs_quota_operations; s->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif /* * new format is limited by the 32 bit wide i_blocks field, want to * be one full block below that. */ s->s_maxbytes = (512LL << 32) - s->s_blocksize; return 0; } /* after journal replay, reread all bitmap and super blocks */ static int reread_meta_blocks(struct super_block *s) { if (bh_read(SB_BUFFER_WITH_SB(s), 0) < 0) { reiserfs_warning(s, "reiserfs-2504", "error reading the super"); return 1; } return 0; } /* hash detection stuff */ /* * if root directory is empty - we set default - Yura's - hash and * warn about it * FIXME: we look for only one name in a directory. If tea and yura * both have the same value - we ask user to send report to the * mailing list */ static __u32 find_hash_out(struct super_block *s) { int retval; struct inode *inode; struct cpu_key key; INITIALIZE_PATH(path); struct reiserfs_dir_entry de; struct reiserfs_de_head *deh; __u32 hash = DEFAULT_HASH; __u32 deh_hashval, teahash, r5hash, yurahash; inode = d_inode(s->s_root); make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3); retval = search_by_entry_key(s, &key, &path, &de); if (retval == IO_ERROR) { pathrelse(&path); return UNSET_HASH; } if (retval == NAME_NOT_FOUND) de.de_entry_num--; set_de_name_and_namelen(&de); deh = de.de_deh + de.de_entry_num; if (deh_offset(deh) == DOT_DOT_OFFSET) { /* allow override in this case */ if (reiserfs_rupasov_hash(s)) hash = YURA_HASH; reiserfs_info(s, "FS seems to be empty, autodetect is using the default hash\n"); goto out; } deh_hashval = GET_HASH_VALUE(deh_offset(deh)); r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen)); teahash = GET_HASH_VALUE(keyed_hash(de.de_name, de.de_namelen)); yurahash = GET_HASH_VALUE(yura_hash(de.de_name, de.de_namelen)); if ((teahash == r5hash && deh_hashval == r5hash) || (teahash == yurahash && deh_hashval == yurahash) || (r5hash == yurahash && deh_hashval == yurahash)) { reiserfs_warning(s, "reiserfs-2506", "Unable to automatically detect hash " "function. Please mount with -o " "hash={tea,rupasov,r5}"); hash = UNSET_HASH; goto out; } if (deh_hashval == yurahash) hash = YURA_HASH; else if (deh_hashval == teahash) hash = TEA_HASH; else if (deh_hashval == r5hash) hash = R5_HASH; else { reiserfs_warning(s, "reiserfs-2506", "Unrecognised hash function"); hash = UNSET_HASH; } out: pathrelse(&path); return hash; } /* finds out which hash names are sorted with */ static int what_hash(struct super_block *s) { __u32 code; code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s)); /* * reiserfs_hash_detect() == true if any of the hash mount options * were used. We must check them to make sure the user isn't * using a bad hash value */ if (code == UNSET_HASH || reiserfs_hash_detect(s)) code = find_hash_out(s); if (code != UNSET_HASH && reiserfs_hash_detect(s)) { /* * detection has found the hash, and we must check against the * mount options */ if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { reiserfs_warning(s, "reiserfs-2507", "Error, %s hash detected, " "unable to force rupasov hash", reiserfs_hashname(code)); code = UNSET_HASH; } else if (reiserfs_tea_hash(s) && code != TEA_HASH) { reiserfs_warning(s, "reiserfs-2508", "Error, %s hash detected, " "unable to force tea hash", reiserfs_hashname(code)); code = UNSET_HASH; } else if (reiserfs_r5_hash(s) && code != R5_HASH) { reiserfs_warning(s, "reiserfs-2509", "Error, %s hash detected, " "unable to force r5 hash", reiserfs_hashname(code)); code = UNSET_HASH; } } else { /* * find_hash_out was not called or * could not determine the hash */ if (reiserfs_rupasov_hash(s)) { code = YURA_HASH; } else if (reiserfs_tea_hash(s)) { code = TEA_HASH; } else if (reiserfs_r5_hash(s)) { code = R5_HASH; } } /* * if we are mounted RW, and we have a new valid hash code, update * the super */ if (code != UNSET_HASH && !sb_rdonly(s) && code != sb_hash_function_code(SB_DISK_SUPER_BLOCK(s))) { set_sb_hash_function_code(SB_DISK_SUPER_BLOCK(s), code); } return code; } /* return pointer to appropriate function */ static hashf_t hash_function(struct super_block *s) { switch (what_hash(s)) { case TEA_HASH: reiserfs_info(s, "Using tea hash to sort names\n"); return keyed_hash; case YURA_HASH: reiserfs_info(s, "Using rupasov hash to sort names\n"); return yura_hash; case R5_HASH: reiserfs_info(s, "Using r5 hash to sort names\n"); return r5_hash; } return NULL; } /* this is used to set up correct value for old partitions */ static int function2code(hashf_t func) { if (func == keyed_hash) return TEA_HASH; if (func == yura_hash) return YURA_HASH; if (func == r5_hash) return R5_HASH; BUG(); /* should never happen */ return 0; } #define SWARN(silent, s, id, ...) \ if (!(silent)) \ reiserfs_warning(s, id, __VA_ARGS__) static int reiserfs_fill_super(struct super_block *s, void *data, int silent) { struct inode *root_inode; struct reiserfs_transaction_handle th; int old_format = 0; unsigned long blocks; unsigned int commit_max_age = 0; int jinit_done = 0; struct reiserfs_iget_args args; struct reiserfs_super_block *rs; char *jdev_name; struct reiserfs_sb_info *sbi; int errval = -EINVAL; char *qf_names[REISERFS_MAXQUOTAS] = {}; unsigned int qfmt = 0; sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; s->s_fs_info = sbi; /* Set default values for options: non-aggressive tails, RO on errors */ sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL); sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO); sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); /* no preallocation minimum, be smart in reiserfs_file_write instead */ sbi->s_alloc_options.preallocmin = 0; /* Preallocate by 16 blocks (17-1) at once */ sbi->s_alloc_options.preallocsize = 17; /* setup default block allocator options */ reiserfs_init_alloc_options(s); spin_lock_init(&sbi->old_work_lock); INIT_DELAYED_WORK(&sbi->old_work, flush_old_commits); mutex_init(&sbi->lock); sbi->lock_depth = -1; sbi->commit_wq = alloc_workqueue("reiserfs/%s", WQ_MEM_RECLAIM, 0, s->s_id); if (!sbi->commit_wq) { SWARN(silent, s, "", "Cannot allocate commit workqueue"); errval = -ENOMEM; goto error_unlocked; } jdev_name = NULL; if (reiserfs_parse_options (s, (char *)data, &sbi->s_mount_opt, &blocks, &jdev_name, &commit_max_age, qf_names, &qfmt) == 0) { goto error_unlocked; } if (jdev_name && jdev_name[0]) { sbi->s_jdev = kstrdup(jdev_name, GFP_KERNEL); if (!sbi->s_jdev) { SWARN(silent, s, "", "Cannot allocate memory for " "journal device name"); goto error_unlocked; } } #ifdef CONFIG_QUOTA handle_quota_files(s, qf_names, &qfmt); #endif if (blocks) { SWARN(silent, s, "jmacd-7", "resize option for remount only"); goto error_unlocked; } /* * try old format (undistributed bitmap, super block in 8-th 1k * block of a device) */ if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES)) old_format = 1; /* * try new format (64-th 1k block), which can contain reiserfs * super block */ else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", s->s_id); goto error_unlocked; } s->s_time_min = 0; s->s_time_max = U32_MAX; rs = SB_DISK_SUPER_BLOCK(s); /* * Let's do basic sanity check to verify that underlying device is not * smaller than the filesystem. If the check fails then abort and * scream, because bad stuff will happen otherwise. */ if (bdev_nr_bytes(s->s_bdev) < sb_block_count(rs) * sb_blocksize(rs)) { SWARN(silent, s, "", "Filesystem cannot be " "mounted because it is bigger than the device"); SWARN(silent, s, "", "You may need to run fsck " "or increase size of your LVM partition"); SWARN(silent, s, "", "Or may be you forgot to " "reboot after fdisk when it told you to"); goto error_unlocked; } sbi->s_mount_state = SB_REISERFS_STATE(s); sbi->s_mount_state = REISERFS_VALID_FS; if ((errval = reiserfs_init_bitmap_cache(s))) { SWARN(silent, s, "jmacd-8", "unable to read bitmap"); goto error_unlocked; } errval = -EINVAL; #ifdef CONFIG_REISERFS_CHECK SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON"); SWARN(silent, s, "", "- it is slow mode for debugging."); #endif /* make data=ordered the default */ if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) && !reiserfs_data_writeback(s)) { sbi->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); } if (reiserfs_data_log(s)) { reiserfs_info(s, "using journaled data mode\n"); } else if (reiserfs_data_ordered(s)) { reiserfs_info(s, "using ordered data mode\n"); } else { reiserfs_info(s, "using writeback data mode\n"); } if (reiserfs_barrier_flush(s)) { printk("reiserfs: using flush barriers\n"); } if (journal_init(s, jdev_name, old_format, commit_max_age)) { SWARN(silent, s, "sh-2022", "unable to initialize journal space"); goto error_unlocked; } else { /* * once this is set, journal_release must be called * if we error out of the mount */ jinit_done = 1; } if (reread_meta_blocks(s)) { SWARN(silent, s, "jmacd-9", "unable to reread meta blocks after journal init"); goto error_unlocked; } if (replay_only(s)) goto error_unlocked; s->s_xattr = reiserfs_xattr_handlers; if (bdev_read_only(s->s_bdev) && !sb_rdonly(s)) { SWARN(silent, s, "clm-7000", "Detected readonly device, marking FS readonly"); s->s_flags |= SB_RDONLY; } args.objectid = REISERFS_ROOT_OBJECTID; args.dirid = REISERFS_ROOT_PARENT_OBJECTID; root_inode = iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor, reiserfs_init_locked_inode, (void *)&args); if (!root_inode) { SWARN(silent, s, "jmacd-10", "get root inode failed"); goto error_unlocked; } /* * This path assumed to be called with the BKL in the old times. * Now we have inherited the big reiserfs lock from it and many * reiserfs helpers called in the mount path and elsewhere require * this lock to be held even if it's not always necessary. Let's be * conservative and hold it early. The window can be reduced after * careful review of the code. */ reiserfs_write_lock(s); if (root_inode->i_state & I_NEW) { reiserfs_read_locked_inode(root_inode, &args); unlock_new_inode(root_inode); } if (!S_ISDIR(root_inode->i_mode) || !inode_get_bytes(root_inode) || !root_inode->i_size) { SWARN(silent, s, "", "corrupt root inode, run fsck"); iput(root_inode); errval = -EUCLEAN; goto error; } s->s_root = d_make_root(root_inode); if (!s->s_root) goto error; /* define and initialize hash function */ sbi->s_hash_function = hash_function(s); if (sbi->s_hash_function == NULL) { dput(s->s_root); s->s_root = NULL; goto error; } if (is_reiserfs_3_5(rs) || (is_reiserfs_jr(rs) && SB_VERSION(s) == REISERFS_VERSION_1)) set_bit(REISERFS_3_5, &sbi->s_properties); else if (old_format) set_bit(REISERFS_OLD_FORMAT, &sbi->s_properties); else set_bit(REISERFS_3_6, &sbi->s_properties); if (!sb_rdonly(s)) { errval = journal_begin(&th, s, 1); if (errval) { dput(s->s_root); s->s_root = NULL; goto error; } reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); set_sb_umount_state(rs, REISERFS_ERROR_FS); set_sb_fs_state(rs, 0); /* * Clear out s_bmap_nr if it would wrap. We can handle this * case, but older revisions can't. This will cause the * file system to fail mount on those older implementations, * avoiding corruption. -jeffm */ if (bmap_would_wrap(reiserfs_bmap_count(s)) && sb_bmap_nr(rs) != 0) { reiserfs_warning(s, "super-2030", "This file system " "claims to use %u bitmap blocks in " "its super block, but requires %u. " "Clearing to zero.", sb_bmap_nr(rs), reiserfs_bmap_count(s)); set_sb_bmap_nr(rs, 0); } if (old_format_only(s)) { /* * filesystem of format 3.5 either with standard * or non-standard journal */ if (convert_reiserfs(s)) { /* and -o conv is given */ if (!silent) reiserfs_info(s, "converting 3.5 filesystem to the 3.6 format"); if (is_reiserfs_3_5(rs)) /* * put magic string of 3.6 format. * 2.2 will not be able to * mount this filesystem anymore */ memcpy(rs->s_v1.s_magic, reiserfs_3_6_magic_string, sizeof (reiserfs_3_6_magic_string)); set_sb_version(rs, REISERFS_VERSION_2); reiserfs_convert_objectid_map_v1(s); set_bit(REISERFS_3_6, &sbi->s_properties); clear_bit(REISERFS_3_5, &sbi->s_properties); } else if (!silent) { reiserfs_info(s, "using 3.5.x disk format\n"); } } else set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); errval = journal_end(&th); if (errval) { dput(s->s_root); s->s_root = NULL; goto error; } reiserfs_write_unlock(s); if ((errval = reiserfs_lookup_privroot(s)) || (errval = reiserfs_xattr_init(s, s->s_flags))) { dput(s->s_root); s->s_root = NULL; goto error_unlocked; } reiserfs_write_lock(s); /* * look for files which were to be removed in previous session */ finish_unfinished(s); } else { if (old_format_only(s) && !silent) { reiserfs_info(s, "using 3.5.x disk format\n"); } reiserfs_write_unlock(s); if ((errval = reiserfs_lookup_privroot(s)) || (errval = reiserfs_xattr_init(s, s->s_flags))) { dput(s->s_root); s->s_root = NULL; goto error_unlocked; } reiserfs_write_lock(s); } /* * mark hash in super block: it could be unset. overwrite should be ok */ set_sb_hash_function_code(rs, function2code(sbi->s_hash_function)); handle_attrs(s); reiserfs_proc_info_init(s); init_waitqueue_head(&(sbi->s_wait)); spin_lock_init(&sbi->bitmap_lock); reiserfs_write_unlock(s); return (0); error: reiserfs_write_unlock(s); error_unlocked: /* kill the commit thread, free journal ram */ if (jinit_done) { reiserfs_write_lock(s); journal_release_error(NULL, s); reiserfs_write_unlock(s); } if (sbi->commit_wq) destroy_workqueue(sbi->commit_wq); reiserfs_cancel_old_flush(s); reiserfs_free_bitmap_cache(s); if (SB_BUFFER_WITH_SB(s)) brelse(SB_BUFFER_WITH_SB(s)); #ifdef CONFIG_QUOTA { int j; for (j = 0; j < REISERFS_MAXQUOTAS; j++) kfree(qf_names[j]); } #endif kfree(sbi->s_jdev); kfree(sbi); s->s_fs_info = NULL; return errval; } static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(dentry->d_sb); buf->f_namelen = (REISERFS_MAX_NAME(s->s_blocksize)); buf->f_bfree = sb_free_blocks(rs); buf->f_bavail = buf->f_bfree; buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1; buf->f_bsize = dentry->d_sb->s_blocksize; /* changed to accommodate gcc folks. */ buf->f_type = REISERFS_SUPER_MAGIC; buf->f_fsid.val[0] = (u32)crc32_le(0, rs->s_uuid, sizeof(rs->s_uuid)/2); buf->f_fsid.val[1] = (u32)crc32_le(0, rs->s_uuid + sizeof(rs->s_uuid)/2, sizeof(rs->s_uuid)/2); return 0; } #ifdef CONFIG_QUOTA static int reiserfs_write_dquot(struct dquot *dquot) { struct reiserfs_transaction_handle th; int ret, err; int depth; reiserfs_write_lock(dquot->dq_sb); ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); if (ret) goto out; depth = reiserfs_write_unlock_nested(dquot->dq_sb); ret = dquot_commit(dquot); reiserfs_write_lock_nested(dquot->dq_sb, depth); err = journal_end(&th); if (!ret && err) ret = err; out: reiserfs_write_unlock(dquot->dq_sb); return ret; } static int reiserfs_acquire_dquot(struct dquot *dquot) { struct reiserfs_transaction_handle th; int ret, err; int depth; reiserfs_write_lock(dquot->dq_sb); ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); if (ret) goto out; depth = reiserfs_write_unlock_nested(dquot->dq_sb); ret = dquot_acquire(dquot); reiserfs_write_lock_nested(dquot->dq_sb, depth); err = journal_end(&th); if (!ret && err) ret = err; out: reiserfs_write_unlock(dquot->dq_sb); return ret; } static int reiserfs_release_dquot(struct dquot *dquot) { struct reiserfs_transaction_handle th; int ret, err; reiserfs_write_lock(dquot->dq_sb); ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); reiserfs_write_unlock(dquot->dq_sb); if (ret) { /* Release dquot anyway to avoid endless cycle in dqput() */ dquot_release(dquot); goto out; } ret = dquot_release(dquot); reiserfs_write_lock(dquot->dq_sb); err = journal_end(&th); if (!ret && err) ret = err; reiserfs_write_unlock(dquot->dq_sb); out: return ret; } static int reiserfs_mark_dquot_dirty(struct dquot *dquot) { /* Are we journaling quotas? */ if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); return reiserfs_write_dquot(dquot); } else return dquot_mark_dquot_dirty(dquot); } static int reiserfs_write_info(struct super_block *sb, int type) { struct reiserfs_transaction_handle th; int ret, err; int depth; /* Data block + inode block */ reiserfs_write_lock(sb); ret = journal_begin(&th, sb, 2); if (ret) goto out; depth = reiserfs_write_unlock_nested(sb); ret = dquot_commit_info(sb, type); reiserfs_write_lock_nested(sb, depth); err = journal_end(&th); if (!ret && err) ret = err; out: reiserfs_write_unlock(sb); return ret; } /* * Turn on quotas during mount time - we need to find the quota file and such... */ static int reiserfs_quota_on_mount(struct super_block *sb, int type) { return dquot_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], REISERFS_SB(sb)->s_jquota_fmt, type); } /* * Standard function to be called on quota_on */ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, const struct path *path) { int err; struct inode *inode; struct reiserfs_transaction_handle th; int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA; reiserfs_write_lock(sb); if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) { err = -EINVAL; goto out; } /* Quotafile not on the same filesystem? */ if (path->dentry->d_sb != sb) { err = -EXDEV; goto out; } inode = d_inode(path->dentry); /* * We must not pack tails for quota files on reiserfs for quota * IO to work */ if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { err = reiserfs_unpack(inode); if (err) { reiserfs_warning(sb, "super-6520", "Unpacking tail of quota file failed" " (%d). Cannot turn on quotas.", err); err = -EINVAL; goto out; } mark_inode_dirty(inode); } /* Journaling quota? */ if (REISERFS_SB(sb)->s_qf_names[type]) { /* Quotafile not of fs root? */ if (path->dentry->d_parent != sb->s_root) reiserfs_warning(sb, "super-6521", "Quota file not on filesystem root. " "Journalled quota will not work."); } /* * When we journal data on quota file, we have to flush journal to see * all updates to the file when we bypass pagecache... */ if (reiserfs_file_data_log(inode)) { /* Just start temporary transaction and finish it */ err = journal_begin(&th, sb, 1); if (err) goto out; err = journal_end_sync(&th); if (err) goto out; } reiserfs_write_unlock(sb); err = dquot_quota_on(sb, type, format_id, path); if (!err) { inode_lock(inode); REISERFS_I(inode)->i_attrs |= REISERFS_IMMUTABLE_FL | REISERFS_NOATIME_FL; inode_set_flags(inode, S_IMMUTABLE | S_NOATIME, S_IMMUTABLE | S_NOATIME); inode_unlock(inode); mark_inode_dirty(inode); } return err; out: reiserfs_write_unlock(sb); return err; } static int reiserfs_quota_off(struct super_block *sb, int type) { int err; struct inode *inode = sb_dqopt(sb)->files[type]; if (!inode || !igrab(inode)) goto out; err = dquot_quota_off(sb, type); if (err) goto out_put; inode_lock(inode); REISERFS_I(inode)->i_attrs &= ~(REISERFS_IMMUTABLE_FL | REISERFS_NOATIME_FL); inode_set_flags(inode, 0, S_IMMUTABLE | S_NOATIME); inode_unlock(inode); mark_inode_dirty(inode); out_put: iput(inode); return err; out: return dquot_quota_off(sb, type); } /* * Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and no one else should touch the files) * we don't have to be afraid of races */ static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off) { struct inode *inode = sb_dqopt(sb)->files[type]; unsigned long blk = off >> sb->s_blocksize_bits; int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; size_t toread; struct buffer_head tmp_bh, *bh; loff_t i_size = i_size_read(inode); if (off > i_size) return 0; if (off + len > i_size) len = i_size - off; toread = len; while (toread > 0) { tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread); tmp_bh.b_state = 0; /* * Quota files are without tails so we can safely * use this function */ reiserfs_write_lock(sb); err = reiserfs_get_block(inode, blk, &tmp_bh, 0); reiserfs_write_unlock(sb); if (err) return err; if (!buffer_mapped(&tmp_bh)) /* A hole? */ memset(data, 0, tocopy); else { bh = sb_bread(sb, tmp_bh.b_blocknr); if (!bh) return -EIO; memcpy(data, bh->b_data + offset, tocopy); brelse(bh); } offset = 0; toread -= tocopy; data += tocopy; blk++; } return len; } /* * Write to quotafile (we know the transaction is already started and has * enough credits) */ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off) { struct inode *inode = sb_dqopt(sb)->files[type]; unsigned long blk = off >> sb->s_blocksize_bits; int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; int journal_quota = REISERFS_SB(sb)->s_qf_names[type] != NULL; size_t towrite = len; struct buffer_head tmp_bh, *bh; if (!current->journal_info) { printk(KERN_WARNING "reiserfs: Quota write (off=%llu, len=%llu) cancelled because transaction is not started.\n", (unsigned long long)off, (unsigned long long)len); return -EIO; } while (towrite > 0) { tocopy = min_t(unsigned long, sb->s_blocksize - offset, towrite); tmp_bh.b_state = 0; reiserfs_write_lock(sb); err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE); reiserfs_write_unlock(sb); if (err) goto out; if (offset || tocopy != sb->s_blocksize) bh = sb_bread(sb, tmp_bh.b_blocknr); else bh = sb_getblk(sb, tmp_bh.b_blocknr); if (!bh) { err = -EIO; goto out; } lock_buffer(bh); memcpy(bh->b_data + offset, data, tocopy); flush_dcache_page(bh->b_page); set_buffer_uptodate(bh); unlock_buffer(bh); reiserfs_write_lock(sb); reiserfs_prepare_for_journal(sb, bh, 1); journal_mark_dirty(current->journal_info, bh); if (!journal_quota) reiserfs_add_ordered_list(inode, bh); reiserfs_write_unlock(sb); brelse(bh); offset = 0; towrite -= tocopy; data += tocopy; blk++; } out: if (len == towrite) return err; if (inode->i_size < off + len - towrite) i_size_write(inode, off + len - towrite); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); mark_inode_dirty(inode); return len - towrite; } #endif static struct dentry *get_super_block(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super); } static int __init init_reiserfs_fs(void) { int ret; ret = init_inodecache(); if (ret) return ret; reiserfs_proc_info_global_init(); ret = register_filesystem(&reiserfs_fs_type); if (ret) goto out; return 0; out: reiserfs_proc_info_global_done(); destroy_inodecache(); return ret; } static void __exit exit_reiserfs_fs(void) { reiserfs_proc_info_global_done(); unregister_filesystem(&reiserfs_fs_type); destroy_inodecache(); } struct file_system_type reiserfs_fs_type = { .owner = THIS_MODULE, .name = "reiserfs", .mount = get_super_block, .kill_sb = reiserfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("reiserfs"); MODULE_DESCRIPTION("ReiserFS journaled filesystem"); MODULE_AUTHOR("Hans Reiser <reiser@namesys.com>"); MODULE_LICENSE("GPL"); module_init(init_reiserfs_fs); module_exit(exit_reiserfs_fs); |
1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Red Hat * * based in parts on udlfb.c: * Copyright (C) 2009 Roberto De Ioris <roberto@unbit.it> * Copyright (C) 2009 Jaya Kumar <jayakumar.lkml@gmail.com> * Copyright (C) 2009 Bernie Thompson <bernie@plugable.com> */ #include <linux/bitfield.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> #include <drm/drm_edid.h> #include <drm/drm_fourcc.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_modeset_helper_vtables.h> #include <drm/drm_plane_helper.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> #include "udl_drv.h" #include "udl_proto.h" /* * All DisplayLink bulk operations start with 0xaf (UDL_MSG_BULK), followed by * a specific command code. All operations are written to a command buffer, which * the driver sends to the device. */ static char *udl_set_register(char *buf, u8 reg, u8 val) { *buf++ = UDL_MSG_BULK; *buf++ = UDL_CMD_WRITEREG; *buf++ = reg; *buf++ = val; return buf; } static char *udl_vidreg_lock(char *buf) { return udl_set_register(buf, UDL_REG_VIDREG, UDL_VIDREG_LOCK); } static char *udl_vidreg_unlock(char *buf) { return udl_set_register(buf, UDL_REG_VIDREG, UDL_VIDREG_UNLOCK); } static char *udl_set_blank_mode(char *buf, u8 mode) { return udl_set_register(buf, UDL_REG_BLANKMODE, mode); } static char *udl_set_color_depth(char *buf, u8 selection) { return udl_set_register(buf, UDL_REG_COLORDEPTH, selection); } static char *udl_set_base16bpp(char *buf, u32 base) { /* the base pointer is 24 bits wide, 0x20 is hi byte. */ u8 reg20 = FIELD_GET(UDL_BASE_ADDR2_MASK, base); u8 reg21 = FIELD_GET(UDL_BASE_ADDR1_MASK, base); u8 reg22 = FIELD_GET(UDL_BASE_ADDR0_MASK, base); buf = udl_set_register(buf, UDL_REG_BASE16BPP_ADDR2, reg20); buf = udl_set_register(buf, UDL_REG_BASE16BPP_ADDR1, reg21); buf = udl_set_register(buf, UDL_REG_BASE16BPP_ADDR0, reg22); return buf; } /* * DisplayLink HW has separate 16bpp and 8bpp framebuffers. * In 24bpp modes, the low 323 RGB bits go in the 8bpp framebuffer */ static char *udl_set_base8bpp(char *buf, u32 base) { /* the base pointer is 24 bits wide, 0x26 is hi byte. */ u8 reg26 = FIELD_GET(UDL_BASE_ADDR2_MASK, base); u8 reg27 = FIELD_GET(UDL_BASE_ADDR1_MASK, base); u8 reg28 = FIELD_GET(UDL_BASE_ADDR0_MASK, base); buf = udl_set_register(buf, UDL_REG_BASE8BPP_ADDR2, reg26); buf = udl_set_register(buf, UDL_REG_BASE8BPP_ADDR1, reg27); buf = udl_set_register(buf, UDL_REG_BASE8BPP_ADDR0, reg28); return buf; } static char *udl_set_register_16(char *wrptr, u8 reg, u16 value) { wrptr = udl_set_register(wrptr, reg, value >> 8); return udl_set_register(wrptr, reg+1, value); } /* * This is kind of weird because the controller takes some * register values in a different byte order than other registers. */ static char *udl_set_register_16be(char *wrptr, u8 reg, u16 value) { wrptr = udl_set_register(wrptr, reg, value); return udl_set_register(wrptr, reg+1, value >> 8); } /* * LFSR is linear feedback shift register. The reason we have this is * because the display controller needs to minimize the clock depth of * various counters used in the display path. So this code reverses the * provided value into the lfsr16 value by counting backwards to get * the value that needs to be set in the hardware comparator to get the * same actual count. This makes sense once you read above a couple of * times and think about it from a hardware perspective. */ static u16 udl_lfsr16(u16 actual_count) { u32 lv = 0xFFFF; /* This is the lfsr value that the hw starts with */ while (actual_count--) { lv = ((lv << 1) | (((lv >> 15) ^ (lv >> 4) ^ (lv >> 2) ^ (lv >> 1)) & 1)) & 0xFFFF; } return (u16) lv; } /* * This does LFSR conversion on the value that is to be written. * See LFSR explanation above for more detail. */ static char *udl_set_register_lfsr16(char *wrptr, u8 reg, u16 value) { return udl_set_register_16(wrptr, reg, udl_lfsr16(value)); } /* * Takes a DRM display mode and converts it into the DisplayLink * equivalent register commands. */ static char *udl_set_display_mode(char *buf, struct drm_display_mode *mode) { u16 reg01 = mode->crtc_htotal - mode->crtc_hsync_start; u16 reg03 = reg01 + mode->crtc_hdisplay; u16 reg05 = mode->crtc_vtotal - mode->crtc_vsync_start; u16 reg07 = reg05 + mode->crtc_vdisplay; u16 reg09 = mode->crtc_htotal - 1; u16 reg0b = 1; /* libdlo hardcodes hsync start to 1 */ u16 reg0d = mode->crtc_hsync_end - mode->crtc_hsync_start + 1; u16 reg0f = mode->hdisplay; u16 reg11 = mode->crtc_vtotal; u16 reg13 = 0; /* libdlo hardcodes vsync start to 0 */ u16 reg15 = mode->crtc_vsync_end - mode->crtc_vsync_start; u16 reg17 = mode->crtc_vdisplay; u16 reg1b = mode->clock / 5; buf = udl_set_register_lfsr16(buf, UDL_REG_XDISPLAYSTART, reg01); buf = udl_set_register_lfsr16(buf, UDL_REG_XDISPLAYEND, reg03); buf = udl_set_register_lfsr16(buf, UDL_REG_YDISPLAYSTART, reg05); buf = udl_set_register_lfsr16(buf, UDL_REG_YDISPLAYEND, reg07); buf = udl_set_register_lfsr16(buf, UDL_REG_XENDCOUNT, reg09); buf = udl_set_register_lfsr16(buf, UDL_REG_HSYNCSTART, reg0b); buf = udl_set_register_lfsr16(buf, UDL_REG_HSYNCEND, reg0d); buf = udl_set_register_16(buf, UDL_REG_HPIXELS, reg0f); buf = udl_set_register_lfsr16(buf, UDL_REG_YENDCOUNT, reg11); buf = udl_set_register_lfsr16(buf, UDL_REG_VSYNCSTART, reg13); buf = udl_set_register_lfsr16(buf, UDL_REG_VSYNCEND, reg15); buf = udl_set_register_16(buf, UDL_REG_VPIXELS, reg17); buf = udl_set_register_16be(buf, UDL_REG_PIXELCLOCK5KHZ, reg1b); return buf; } static char *udl_dummy_render(char *wrptr) { *wrptr++ = UDL_MSG_BULK; *wrptr++ = UDL_CMD_WRITECOPY16; *wrptr++ = 0x00; /* from addr */ *wrptr++ = 0x00; *wrptr++ = 0x00; *wrptr++ = 0x01; /* one pixel */ *wrptr++ = 0x00; /* to address */ *wrptr++ = 0x00; *wrptr++ = 0x00; return wrptr; } static long udl_log_cpp(unsigned int cpp) { if (WARN_ON(!is_power_of_2(cpp))) return -EINVAL; return __ffs(cpp); } static int udl_handle_damage(struct drm_framebuffer *fb, const struct iosys_map *map, const struct drm_rect *clip) { struct drm_device *dev = fb->dev; void *vaddr = map->vaddr; /* TODO: Use mapping abstraction properly */ int i, ret; char *cmd; struct urb *urb; int log_bpp; ret = udl_log_cpp(fb->format->cpp[0]); if (ret < 0) return ret; log_bpp = ret; urb = udl_get_urb(dev); if (!urb) return -ENOMEM; cmd = urb->transfer_buffer; for (i = clip->y1; i < clip->y2; i++) { const int line_offset = fb->pitches[0] * i; const int byte_offset = line_offset + (clip->x1 << log_bpp); const int dev_byte_offset = (fb->width * i + clip->x1) << log_bpp; const int byte_width = drm_rect_width(clip) << log_bpp; ret = udl_render_hline(dev, log_bpp, &urb, (char *)vaddr, &cmd, byte_offset, dev_byte_offset, byte_width); if (ret) return ret; } if (cmd > (char *)urb->transfer_buffer) { /* Send partial buffer remaining before exiting */ int len; if (cmd < (char *)urb->transfer_buffer + urb->transfer_buffer_length) *cmd++ = UDL_MSG_BULK; len = cmd - (char *)urb->transfer_buffer; ret = udl_submit_urb(dev, urb, len); } else { udl_urb_completion(urb); } return 0; } /* * Primary plane */ static const uint32_t udl_primary_plane_formats[] = { DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, }; static const uint64_t udl_primary_plane_fmtmods[] = { DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; static void udl_primary_plane_helper_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { struct drm_device *dev = plane->dev; struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane); struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); struct drm_framebuffer *fb = plane_state->fb; struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, plane); struct drm_atomic_helper_damage_iter iter; struct drm_rect damage; int ret, idx; if (!fb) return; /* no framebuffer; plane is disabled */ ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); if (ret) return; if (!drm_dev_enter(dev, &idx)) goto out_drm_gem_fb_end_cpu_access; drm_atomic_helper_damage_iter_init(&iter, old_plane_state, plane_state); drm_atomic_for_each_plane_damage(&iter, &damage) { udl_handle_damage(fb, &shadow_plane_state->data[0], &damage); } drm_dev_exit(idx); out_drm_gem_fb_end_cpu_access: drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); } static const struct drm_plane_helper_funcs udl_primary_plane_helper_funcs = { DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, .atomic_check = drm_plane_helper_atomic_check, .atomic_update = udl_primary_plane_helper_atomic_update, }; static const struct drm_plane_funcs udl_primary_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .destroy = drm_plane_cleanup, DRM_GEM_SHADOW_PLANE_FUNCS, }; /* * CRTC */ static void udl_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_device *dev = crtc->dev; struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc); struct drm_display_mode *mode = &crtc_state->mode; struct urb *urb; char *buf; int idx; if (!drm_dev_enter(dev, &idx)) return; urb = udl_get_urb(dev); if (!urb) goto out; buf = (char *)urb->transfer_buffer; buf = udl_vidreg_lock(buf); buf = udl_set_color_depth(buf, UDL_COLORDEPTH_16BPP); /* set base for 16bpp segment to 0 */ buf = udl_set_base16bpp(buf, 0); /* set base for 8bpp segment to end of fb */ buf = udl_set_base8bpp(buf, 2 * mode->vdisplay * mode->hdisplay); buf = udl_set_display_mode(buf, mode); buf = udl_set_blank_mode(buf, UDL_BLANKMODE_ON); buf = udl_vidreg_unlock(buf); buf = udl_dummy_render(buf); udl_submit_urb(dev, urb, buf - (char *)urb->transfer_buffer); out: drm_dev_exit(idx); } static void udl_crtc_helper_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_device *dev = crtc->dev; struct urb *urb; char *buf; int idx; if (!drm_dev_enter(dev, &idx)) return; urb = udl_get_urb(dev); if (!urb) goto out; buf = (char *)urb->transfer_buffer; buf = udl_vidreg_lock(buf); buf = udl_set_blank_mode(buf, UDL_BLANKMODE_POWERDOWN); buf = udl_vidreg_unlock(buf); buf = udl_dummy_render(buf); udl_submit_urb(dev, urb, buf - (char *)urb->transfer_buffer); out: drm_dev_exit(idx); } static const struct drm_crtc_helper_funcs udl_crtc_helper_funcs = { .atomic_check = drm_crtc_helper_atomic_check, .atomic_enable = udl_crtc_helper_atomic_enable, .atomic_disable = udl_crtc_helper_atomic_disable, }; static const struct drm_crtc_funcs udl_crtc_funcs = { .reset = drm_atomic_helper_crtc_reset, .destroy = drm_crtc_cleanup, .set_config = drm_atomic_helper_set_config, .page_flip = drm_atomic_helper_page_flip, .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, }; /* * Encoder */ static const struct drm_encoder_funcs udl_encoder_funcs = { .destroy = drm_encoder_cleanup, }; /* * Connector */ static int udl_connector_helper_get_modes(struct drm_connector *connector) { struct udl_connector *udl_connector = to_udl_connector(connector); drm_connector_update_edid_property(connector, udl_connector->edid); if (udl_connector->edid) return drm_add_edid_modes(connector, udl_connector->edid); return 0; } static const struct drm_connector_helper_funcs udl_connector_helper_funcs = { .get_modes = udl_connector_helper_get_modes, }; static int udl_get_edid_block(void *data, u8 *buf, unsigned int block, size_t len) { struct udl_device *udl = data; struct drm_device *dev = &udl->drm; struct usb_device *udev = udl_to_usb_device(udl); u8 *read_buff; int ret; size_t i; read_buff = kmalloc(2, GFP_KERNEL); if (!read_buff) return -ENOMEM; for (i = 0; i < len; i++) { int bval = (i + block * EDID_LENGTH) << 8; ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x02, (0x80 | (0x02 << 5)), bval, 0xA1, read_buff, 2, USB_CTRL_GET_TIMEOUT); if (ret < 0) { drm_err(dev, "Read EDID byte %zu failed err %x\n", i, ret); goto err_kfree; } else if (ret < 1) { ret = -EIO; drm_err(dev, "Read EDID byte %zu failed\n", i); goto err_kfree; } buf[i] = read_buff[1]; } kfree(read_buff); return 0; err_kfree: kfree(read_buff); return ret; } static enum drm_connector_status udl_connector_detect(struct drm_connector *connector, bool force) { struct drm_device *dev = connector->dev; struct udl_device *udl = to_udl(dev); struct udl_connector *udl_connector = to_udl_connector(connector); enum drm_connector_status status = connector_status_disconnected; int idx; /* cleanup previous EDID */ kfree(udl_connector->edid); udl_connector->edid = NULL; if (!drm_dev_enter(dev, &idx)) return connector_status_disconnected; udl_connector->edid = drm_do_get_edid(connector, udl_get_edid_block, udl); if (udl_connector->edid) status = connector_status_connected; drm_dev_exit(idx); return status; } static void udl_connector_destroy(struct drm_connector *connector) { struct udl_connector *udl_connector = to_udl_connector(connector); drm_connector_cleanup(connector); kfree(udl_connector->edid); kfree(udl_connector); } static const struct drm_connector_funcs udl_connector_funcs = { .reset = drm_atomic_helper_connector_reset, .detect = udl_connector_detect, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = udl_connector_destroy, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; struct drm_connector *udl_connector_init(struct drm_device *dev) { struct udl_connector *udl_connector; struct drm_connector *connector; int ret; udl_connector = kzalloc(sizeof(*udl_connector), GFP_KERNEL); if (!udl_connector) return ERR_PTR(-ENOMEM); connector = &udl_connector->connector; ret = drm_connector_init(dev, connector, &udl_connector_funcs, DRM_MODE_CONNECTOR_VGA); if (ret) goto err_kfree; drm_connector_helper_add(connector, &udl_connector_helper_funcs); connector->polled = DRM_CONNECTOR_POLL_HPD | DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; return connector; err_kfree: kfree(udl_connector); return ERR_PTR(ret); } /* * Modesetting */ static enum drm_mode_status udl_mode_config_mode_valid(struct drm_device *dev, const struct drm_display_mode *mode) { struct udl_device *udl = to_udl(dev); if (udl->sku_pixel_limit) { if (mode->vdisplay * mode->hdisplay > udl->sku_pixel_limit) return MODE_MEM; } return MODE_OK; } static const struct drm_mode_config_funcs udl_mode_config_funcs = { .fb_create = drm_gem_fb_create_with_dirty, .mode_valid = udl_mode_config_mode_valid, .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; int udl_modeset_init(struct drm_device *dev) { struct udl_device *udl = to_udl(dev); struct drm_plane *primary_plane; struct drm_crtc *crtc; struct drm_encoder *encoder; struct drm_connector *connector; int ret; ret = drmm_mode_config_init(dev); if (ret) return ret; dev->mode_config.min_width = 640; dev->mode_config.min_height = 480; dev->mode_config.max_width = 2048; dev->mode_config.max_height = 2048; dev->mode_config.preferred_depth = 16; dev->mode_config.funcs = &udl_mode_config_funcs; primary_plane = &udl->primary_plane; ret = drm_universal_plane_init(dev, primary_plane, 0, &udl_primary_plane_funcs, udl_primary_plane_formats, ARRAY_SIZE(udl_primary_plane_formats), udl_primary_plane_fmtmods, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) return ret; drm_plane_helper_add(primary_plane, &udl_primary_plane_helper_funcs); drm_plane_enable_fb_damage_clips(primary_plane); crtc = &udl->crtc; ret = drm_crtc_init_with_planes(dev, crtc, primary_plane, NULL, &udl_crtc_funcs, NULL); if (ret) return ret; drm_crtc_helper_add(crtc, &udl_crtc_helper_funcs); encoder = &udl->encoder; ret = drm_encoder_init(dev, encoder, &udl_encoder_funcs, DRM_MODE_ENCODER_DAC, NULL); if (ret) return ret; encoder->possible_crtcs = drm_crtc_mask(crtc); connector = udl_connector_init(dev); if (IS_ERR(connector)) return PTR_ERR(connector); ret = drm_connector_attach_encoder(connector, encoder); if (ret) return ret; drm_mode_config_reset(dev); return 0; } |
19 19 19 17 1 17 1 17 6 6 17 16 16 5 13 17 1 5 1 17 17 17 17 17 18 17 1 1 17 12 17 16 12 17 12 17 17 17 17 6 6 1 5 1 19 16 3 43 41 1 12 9 8 3 3 34 1 34 24 1 6 5 2 1 14 4 2 1 1 46 2 1 13 2 73 64 10 4 2 2 6 30 2 7 5 2 6 2 13 4 12 34 1 11 1 41 7 28 8 35 32 31 4 13 20 1 19 1 19 17 8 19 19 19 53 54 38 16 13 13 1 13 13 13 12 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 | /* * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> #include <linux/moduleparam.h> #include <linux/gfp.h> #include <net/sock.h> #include <linux/in.h> #include <linux/list.h> #include <linux/ratelimit.h> #include <linux/export.h> #include <linux/sizes.h> #include "rds.h" /* When transmitting messages in rds_send_xmit, we need to emerge from * time to time and briefly release the CPU. Otherwise the softlock watchdog * will kick our shin. * Also, it seems fairer to not let one busy connection stall all the * others. * * send_batch_count is the number of times we'll loop in send_xmit. Setting * it to 0 will restore the old behavior (where we looped until we had * drained the queue). */ static int send_batch_count = SZ_1K; module_param(send_batch_count, int, 0444); MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue"); static void rds_send_remove_from_sock(struct list_head *messages, int status); /* * Reset the send state. Callers must ensure that this doesn't race with * rds_send_xmit(). */ void rds_send_path_reset(struct rds_conn_path *cp) { struct rds_message *rm, *tmp; unsigned long flags; if (cp->cp_xmit_rm) { rm = cp->cp_xmit_rm; cp->cp_xmit_rm = NULL; /* Tell the user the RDMA op is no longer mapped by the * transport. This isn't entirely true (it's flushed out * independently) but as the connection is down, there's * no ongoing RDMA to/from that memory */ rds_message_unmapped(rm); rds_message_put(rm); } cp->cp_xmit_sg = 0; cp->cp_xmit_hdr_off = 0; cp->cp_xmit_data_off = 0; cp->cp_xmit_atomic_sent = 0; cp->cp_xmit_rdma_sent = 0; cp->cp_xmit_data_sent = 0; cp->cp_conn->c_map_queued = 0; cp->cp_unacked_packets = rds_sysctl_max_unacked_packets; cp->cp_unacked_bytes = rds_sysctl_max_unacked_bytes; /* Mark messages as retransmissions, and move them to the send q */ spin_lock_irqsave(&cp->cp_lock, flags); list_for_each_entry_safe(rm, tmp, &cp->cp_retrans, m_conn_item) { set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags); set_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags); } list_splice_init(&cp->cp_retrans, &cp->cp_send_queue); spin_unlock_irqrestore(&cp->cp_lock, flags); } EXPORT_SYMBOL_GPL(rds_send_path_reset); static int acquire_in_xmit(struct rds_conn_path *cp) { return test_and_set_bit(RDS_IN_XMIT, &cp->cp_flags) == 0; } static void release_in_xmit(struct rds_conn_path *cp) { clear_bit(RDS_IN_XMIT, &cp->cp_flags); smp_mb__after_atomic(); /* * We don't use wait_on_bit()/wake_up_bit() because our waking is in a * hot path and finding waiters is very rare. We don't want to walk * the system-wide hashed waitqueue buckets in the fast path only to * almost never find waiters. */ if (waitqueue_active(&cp->cp_waitq)) wake_up_all(&cp->cp_waitq); } /* * We're making the conscious trade-off here to only send one message * down the connection at a time. * Pro: * - tx queueing is a simple fifo list * - reassembly is optional and easily done by transports per conn * - no per flow rx lookup at all, straight to the socket * - less per-frag memory and wire overhead * Con: * - queued acks can be delayed behind large messages * Depends: * - small message latency is higher behind queued large messages * - large message latency isn't starved by intervening small sends */ int rds_send_xmit(struct rds_conn_path *cp) { struct rds_connection *conn = cp->cp_conn; struct rds_message *rm; unsigned long flags; unsigned int tmp; struct scatterlist *sg; int ret = 0; LIST_HEAD(to_be_dropped); int batch_count; unsigned long send_gen = 0; int same_rm = 0; restart: batch_count = 0; /* * sendmsg calls here after having queued its message on the send * queue. We only have one task feeding the connection at a time. If * another thread is already feeding the queue then we back off. This * avoids blocking the caller and trading per-connection data between * caches per message. */ if (!acquire_in_xmit(cp)) { rds_stats_inc(s_send_lock_contention); ret = -ENOMEM; goto out; } if (rds_destroy_pending(cp->cp_conn)) { release_in_xmit(cp); ret = -ENETUNREACH; /* dont requeue send work */ goto out; } /* * we record the send generation after doing the xmit acquire. * if someone else manages to jump in and do some work, we'll use * this to avoid a goto restart farther down. * * The acquire_in_xmit() check above ensures that only one * caller can increment c_send_gen at any time. */ send_gen = READ_ONCE(cp->cp_send_gen) + 1; WRITE_ONCE(cp->cp_send_gen, send_gen); /* * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT, * we do the opposite to avoid races. */ if (!rds_conn_path_up(cp)) { release_in_xmit(cp); ret = 0; goto out; } if (conn->c_trans->xmit_path_prepare) conn->c_trans->xmit_path_prepare(cp); /* * spin trying to push headers and data down the connection until * the connection doesn't make forward progress. */ while (1) { rm = cp->cp_xmit_rm; if (!rm) { same_rm = 0; } else { same_rm++; if (same_rm >= 4096) { rds_stats_inc(s_send_stuck_rm); ret = -EAGAIN; break; } } /* * If between sending messages, we can send a pending congestion * map update. */ if (!rm && test_and_clear_bit(0, &conn->c_map_queued)) { rm = rds_cong_update_alloc(conn); if (IS_ERR(rm)) { ret = PTR_ERR(rm); break; } rm->data.op_active = 1; rm->m_inc.i_conn_path = cp; rm->m_inc.i_conn = cp->cp_conn; cp->cp_xmit_rm = rm; } /* * If not already working on one, grab the next message. * * cp_xmit_rm holds a ref while we're sending this message down * the connction. We can use this ref while holding the * send_sem.. rds_send_reset() is serialized with it. */ if (!rm) { unsigned int len; batch_count++; /* we want to process as big a batch as we can, but * we also want to avoid softlockups. If we've been * through a lot of messages, lets back off and see * if anyone else jumps in */ if (batch_count >= send_batch_count) goto over_batch; spin_lock_irqsave(&cp->cp_lock, flags); if (!list_empty(&cp->cp_send_queue)) { rm = list_entry(cp->cp_send_queue.next, struct rds_message, m_conn_item); rds_message_addref(rm); /* * Move the message from the send queue to the retransmit * list right away. */ list_move_tail(&rm->m_conn_item, &cp->cp_retrans); } spin_unlock_irqrestore(&cp->cp_lock, flags); if (!rm) break; /* Unfortunately, the way Infiniband deals with * RDMA to a bad MR key is by moving the entire * queue pair to error state. We could possibly * recover from that, but right now we drop the * connection. * Therefore, we never retransmit messages with RDMA ops. */ if (test_bit(RDS_MSG_FLUSH, &rm->m_flags) || (rm->rdma.op_active && test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))) { spin_lock_irqsave(&cp->cp_lock, flags); if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) list_move(&rm->m_conn_item, &to_be_dropped); spin_unlock_irqrestore(&cp->cp_lock, flags); continue; } /* Require an ACK every once in a while */ len = ntohl(rm->m_inc.i_hdr.h_len); if (cp->cp_unacked_packets == 0 || cp->cp_unacked_bytes < len) { set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags); cp->cp_unacked_packets = rds_sysctl_max_unacked_packets; cp->cp_unacked_bytes = rds_sysctl_max_unacked_bytes; rds_stats_inc(s_send_ack_required); } else { cp->cp_unacked_bytes -= len; cp->cp_unacked_packets--; } cp->cp_xmit_rm = rm; } /* The transport either sends the whole rdma or none of it */ if (rm->rdma.op_active && !cp->cp_xmit_rdma_sent) { rm->m_final_op = &rm->rdma; /* The transport owns the mapped memory for now. * You can't unmap it while it's on the send queue */ set_bit(RDS_MSG_MAPPED, &rm->m_flags); ret = conn->c_trans->xmit_rdma(conn, &rm->rdma); if (ret) { clear_bit(RDS_MSG_MAPPED, &rm->m_flags); wake_up_interruptible(&rm->m_flush_wait); break; } cp->cp_xmit_rdma_sent = 1; } if (rm->atomic.op_active && !cp->cp_xmit_atomic_sent) { rm->m_final_op = &rm->atomic; /* The transport owns the mapped memory for now. * You can't unmap it while it's on the send queue */ set_bit(RDS_MSG_MAPPED, &rm->m_flags); ret = conn->c_trans->xmit_atomic(conn, &rm->atomic); if (ret) { clear_bit(RDS_MSG_MAPPED, &rm->m_flags); wake_up_interruptible(&rm->m_flush_wait); break; } cp->cp_xmit_atomic_sent = 1; } /* * A number of cases require an RDS header to be sent * even if there is no data. * We permit 0-byte sends; rds-ping depends on this. * However, if there are exclusively attached silent ops, * we skip the hdr/data send, to enable silent operation. */ if (rm->data.op_nents == 0) { int ops_present; int all_ops_are_silent = 1; ops_present = (rm->atomic.op_active || rm->rdma.op_active); if (rm->atomic.op_active && !rm->atomic.op_silent) all_ops_are_silent = 0; if (rm->rdma.op_active && !rm->rdma.op_silent) all_ops_are_silent = 0; if (ops_present && all_ops_are_silent && !rm->m_rdma_cookie) rm->data.op_active = 0; } if (rm->data.op_active && !cp->cp_xmit_data_sent) { rm->m_final_op = &rm->data; ret = conn->c_trans->xmit(conn, rm, cp->cp_xmit_hdr_off, cp->cp_xmit_sg, cp->cp_xmit_data_off); if (ret <= 0) break; if (cp->cp_xmit_hdr_off < sizeof(struct rds_header)) { tmp = min_t(int, ret, sizeof(struct rds_header) - cp->cp_xmit_hdr_off); cp->cp_xmit_hdr_off += tmp; ret -= tmp; } sg = &rm->data.op_sg[cp->cp_xmit_sg]; while (ret) { tmp = min_t(int, ret, sg->length - cp->cp_xmit_data_off); cp->cp_xmit_data_off += tmp; ret -= tmp; if (cp->cp_xmit_data_off == sg->length) { cp->cp_xmit_data_off = 0; sg++; cp->cp_xmit_sg++; BUG_ON(ret != 0 && cp->cp_xmit_sg == rm->data.op_nents); } } if (cp->cp_xmit_hdr_off == sizeof(struct rds_header) && (cp->cp_xmit_sg == rm->data.op_nents)) cp->cp_xmit_data_sent = 1; } /* * A rm will only take multiple times through this loop * if there is a data op. Thus, if the data is sent (or there was * none), then we're done with the rm. */ if (!rm->data.op_active || cp->cp_xmit_data_sent) { cp->cp_xmit_rm = NULL; cp->cp_xmit_sg = 0; cp->cp_xmit_hdr_off = 0; cp->cp_xmit_data_off = 0; cp->cp_xmit_rdma_sent = 0; cp->cp_xmit_atomic_sent = 0; cp->cp_xmit_data_sent = 0; rds_message_put(rm); } } over_batch: if (conn->c_trans->xmit_path_complete) conn->c_trans->xmit_path_complete(cp); release_in_xmit(cp); /* Nuke any messages we decided not to retransmit. */ if (!list_empty(&to_be_dropped)) { /* irqs on here, so we can put(), unlike above */ list_for_each_entry(rm, &to_be_dropped, m_conn_item) rds_message_put(rm); rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED); } /* * Other senders can queue a message after we last test the send queue * but before we clear RDS_IN_XMIT. In that case they'd back off and * not try and send their newly queued message. We need to check the * send queue after having cleared RDS_IN_XMIT so that their message * doesn't get stuck on the send queue. * * If the transport cannot continue (i.e ret != 0), then it must * call us when more room is available, such as from the tx * completion handler. * * We have an extra generation check here so that if someone manages * to jump in after our release_in_xmit, we'll see that they have done * some work and we will skip our goto */ if (ret == 0) { bool raced; smp_mb(); raced = send_gen != READ_ONCE(cp->cp_send_gen); if ((test_bit(0, &conn->c_map_queued) || !list_empty(&cp->cp_send_queue)) && !raced) { if (batch_count < send_batch_count) goto restart; rcu_read_lock(); if (rds_destroy_pending(cp->cp_conn)) ret = -ENETUNREACH; else queue_delayed_work(rds_wq, &cp->cp_send_w, 1); rcu_read_unlock(); } else if (raced) { rds_stats_inc(s_send_lock_queue_raced); } } out: return ret; } EXPORT_SYMBOL_GPL(rds_send_xmit); static void rds_send_sndbuf_remove(struct rds_sock *rs, struct rds_message *rm) { u32 len = be32_to_cpu(rm->m_inc.i_hdr.h_len); assert_spin_locked(&rs->rs_lock); BUG_ON(rs->rs_snd_bytes < len); rs->rs_snd_bytes -= len; if (rs->rs_snd_bytes == 0) rds_stats_inc(s_send_queue_empty); } static inline int rds_send_is_acked(struct rds_message *rm, u64 ack, is_acked_func is_acked) { if (is_acked) return is_acked(rm, ack); return be64_to_cpu(rm->m_inc.i_hdr.h_sequence) <= ack; } /* * This is pretty similar to what happens below in the ACK * handling code - except that we call here as soon as we get * the IB send completion on the RDMA op and the accompanying * message. */ void rds_rdma_send_complete(struct rds_message *rm, int status) { struct rds_sock *rs = NULL; struct rm_rdma_op *ro; struct rds_notifier *notifier; unsigned long flags; spin_lock_irqsave(&rm->m_rs_lock, flags); ro = &rm->rdma; if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) && ro->op_active && ro->op_notify && ro->op_notifier) { notifier = ro->op_notifier; rs = rm->m_rs; sock_hold(rds_rs_to_sk(rs)); notifier->n_status = status; spin_lock(&rs->rs_lock); list_add_tail(¬ifier->n_list, &rs->rs_notify_queue); spin_unlock(&rs->rs_lock); ro->op_notifier = NULL; } spin_unlock_irqrestore(&rm->m_rs_lock, flags); if (rs) { rds_wake_sk_sleep(rs); sock_put(rds_rs_to_sk(rs)); } } EXPORT_SYMBOL_GPL(rds_rdma_send_complete); /* * Just like above, except looks at atomic op */ void rds_atomic_send_complete(struct rds_message *rm, int status) { struct rds_sock *rs = NULL; struct rm_atomic_op *ao; struct rds_notifier *notifier; unsigned long flags; spin_lock_irqsave(&rm->m_rs_lock, flags); ao = &rm->atomic; if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) && ao->op_active && ao->op_notify && ao->op_notifier) { notifier = ao->op_notifier; rs = rm->m_rs; sock_hold(rds_rs_to_sk(rs)); notifier->n_status = status; spin_lock(&rs->rs_lock); list_add_tail(¬ifier->n_list, &rs->rs_notify_queue); spin_unlock(&rs->rs_lock); ao->op_notifier = NULL; } spin_unlock_irqrestore(&rm->m_rs_lock, flags); if (rs) { rds_wake_sk_sleep(rs); sock_put(rds_rs_to_sk(rs)); } } EXPORT_SYMBOL_GPL(rds_atomic_send_complete); /* * This is the same as rds_rdma_send_complete except we * don't do any locking - we have all the ingredients (message, * socket, socket lock) and can just move the notifier. */ static inline void __rds_send_complete(struct rds_sock *rs, struct rds_message *rm, int status) { struct rm_rdma_op *ro; struct rm_atomic_op *ao; ro = &rm->rdma; if (ro->op_active && ro->op_notify && ro->op_notifier) { ro->op_notifier->n_status = status; list_add_tail(&ro->op_notifier->n_list, &rs->rs_notify_queue); ro->op_notifier = NULL; } ao = &rm->atomic; if (ao->op_active && ao->op_notify && ao->op_notifier) { ao->op_notifier->n_status = status; list_add_tail(&ao->op_notifier->n_list, &rs->rs_notify_queue); ao->op_notifier = NULL; } /* No need to wake the app - caller does this */ } /* * This removes messages from the socket's list if they're on it. The list * argument must be private to the caller, we must be able to modify it * without locks. The messages must have a reference held for their * position on the list. This function will drop that reference after * removing the messages from the 'messages' list regardless of if it found * the messages on the socket list or not. */ static void rds_send_remove_from_sock(struct list_head *messages, int status) { unsigned long flags; struct rds_sock *rs = NULL; struct rds_message *rm; while (!list_empty(messages)) { int was_on_sock = 0; rm = list_entry(messages->next, struct rds_message, m_conn_item); list_del_init(&rm->m_conn_item); /* * If we see this flag cleared then we're *sure* that someone * else beat us to removing it from the sock. If we race * with their flag update we'll get the lock and then really * see that the flag has been cleared. * * The message spinlock makes sure nobody clears rm->m_rs * while we're messing with it. It does not prevent the * message from being removed from the socket, though. */ spin_lock_irqsave(&rm->m_rs_lock, flags); if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) goto unlock_and_drop; if (rs != rm->m_rs) { if (rs) { rds_wake_sk_sleep(rs); sock_put(rds_rs_to_sk(rs)); } rs = rm->m_rs; if (rs) sock_hold(rds_rs_to_sk(rs)); } if (!rs) goto unlock_and_drop; spin_lock(&rs->rs_lock); if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { struct rm_rdma_op *ro = &rm->rdma; struct rds_notifier *notifier; list_del_init(&rm->m_sock_item); rds_send_sndbuf_remove(rs, rm); if (ro->op_active && ro->op_notifier && (ro->op_notify || (ro->op_recverr && status))) { notifier = ro->op_notifier; list_add_tail(¬ifier->n_list, &rs->rs_notify_queue); if (!notifier->n_status) notifier->n_status = status; rm->rdma.op_notifier = NULL; } was_on_sock = 1; } spin_unlock(&rs->rs_lock); unlock_and_drop: spin_unlock_irqrestore(&rm->m_rs_lock, flags); rds_message_put(rm); if (was_on_sock) rds_message_put(rm); } if (rs) { rds_wake_sk_sleep(rs); sock_put(rds_rs_to_sk(rs)); } } /* * Transports call here when they've determined that the receiver queued * messages up to, and including, the given sequence number. Messages are * moved to the retrans queue when rds_send_xmit picks them off the send * queue. This means that in the TCP case, the message may not have been * assigned the m_ack_seq yet - but that's fine as long as tcp_is_acked * checks the RDS_MSG_HAS_ACK_SEQ bit. */ void rds_send_path_drop_acked(struct rds_conn_path *cp, u64 ack, is_acked_func is_acked) { struct rds_message *rm, *tmp; unsigned long flags; LIST_HEAD(list); spin_lock_irqsave(&cp->cp_lock, flags); list_for_each_entry_safe(rm, tmp, &cp->cp_retrans, m_conn_item) { if (!rds_send_is_acked(rm, ack, is_acked)) break; list_move(&rm->m_conn_item, &list); clear_bit(RDS_MSG_ON_CONN, &rm->m_flags); } /* order flag updates with spin locks */ if (!list_empty(&list)) smp_mb__after_atomic(); spin_unlock_irqrestore(&cp->cp_lock, flags); /* now remove the messages from the sock list as needed */ rds_send_remove_from_sock(&list, RDS_RDMA_SUCCESS); } EXPORT_SYMBOL_GPL(rds_send_path_drop_acked); void rds_send_drop_acked(struct rds_connection *conn, u64 ack, is_acked_func is_acked) { WARN_ON(conn->c_trans->t_mp_capable); rds_send_path_drop_acked(&conn->c_path[0], ack, is_acked); } EXPORT_SYMBOL_GPL(rds_send_drop_acked); void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in6 *dest) { struct rds_message *rm, *tmp; struct rds_connection *conn; struct rds_conn_path *cp; unsigned long flags; LIST_HEAD(list); /* get all the messages we're dropping under the rs lock */ spin_lock_irqsave(&rs->rs_lock, flags); list_for_each_entry_safe(rm, tmp, &rs->rs_send_queue, m_sock_item) { if (dest && (!ipv6_addr_equal(&dest->sin6_addr, &rm->m_daddr) || dest->sin6_port != rm->m_inc.i_hdr.h_dport)) continue; list_move(&rm->m_sock_item, &list); rds_send_sndbuf_remove(rs, rm); clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); } /* order flag updates with the rs lock */ smp_mb__after_atomic(); spin_unlock_irqrestore(&rs->rs_lock, flags); if (list_empty(&list)) return; /* Remove the messages from the conn */ list_for_each_entry(rm, &list, m_sock_item) { conn = rm->m_inc.i_conn; if (conn->c_trans->t_mp_capable) cp = rm->m_inc.i_conn_path; else cp = &conn->c_path[0]; spin_lock_irqsave(&cp->cp_lock, flags); /* * Maybe someone else beat us to removing rm from the conn. * If we race with their flag update we'll get the lock and * then really see that the flag has been cleared. */ if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { spin_unlock_irqrestore(&cp->cp_lock, flags); continue; } list_del_init(&rm->m_conn_item); spin_unlock_irqrestore(&cp->cp_lock, flags); /* * Couldn't grab m_rs_lock in top loop (lock ordering), * but we can now. */ spin_lock_irqsave(&rm->m_rs_lock, flags); spin_lock(&rs->rs_lock); __rds_send_complete(rs, rm, RDS_RDMA_CANCELED); spin_unlock(&rs->rs_lock); spin_unlock_irqrestore(&rm->m_rs_lock, flags); rds_message_put(rm); } rds_wake_sk_sleep(rs); while (!list_empty(&list)) { rm = list_entry(list.next, struct rds_message, m_sock_item); list_del_init(&rm->m_sock_item); rds_message_wait(rm); /* just in case the code above skipped this message * because RDS_MSG_ON_CONN wasn't set, run it again here * taking m_rs_lock is the only thing that keeps us * from racing with ack processing. */ spin_lock_irqsave(&rm->m_rs_lock, flags); spin_lock(&rs->rs_lock); __rds_send_complete(rs, rm, RDS_RDMA_CANCELED); spin_unlock(&rs->rs_lock); spin_unlock_irqrestore(&rm->m_rs_lock, flags); rds_message_put(rm); } } /* * we only want this to fire once so we use the callers 'queued'. It's * possible that another thread can race with us and remove the * message from the flow with RDS_CANCEL_SENT_TO. */ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn, struct rds_conn_path *cp, struct rds_message *rm, __be16 sport, __be16 dport, int *queued) { unsigned long flags; u32 len; if (*queued) goto out; len = be32_to_cpu(rm->m_inc.i_hdr.h_len); /* this is the only place which holds both the socket's rs_lock * and the connection's c_lock */ spin_lock_irqsave(&rs->rs_lock, flags); /* * If there is a little space in sndbuf, we don't queue anything, * and userspace gets -EAGAIN. But poll() indicates there's send * room. This can lead to bad behavior (spinning) if snd_bytes isn't * freed up by incoming acks. So we check the *old* value of * rs_snd_bytes here to allow the last msg to exceed the buffer, * and poll() now knows no more data can be sent. */ if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) { rs->rs_snd_bytes += len; /* let recv side know we are close to send space exhaustion. * This is probably not the optimal way to do it, as this * means we set the flag on *all* messages as soon as our * throughput hits a certain threshold. */ if (rs->rs_snd_bytes >= rds_sk_sndbuf(rs) / 2) set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags); list_add_tail(&rm->m_sock_item, &rs->rs_send_queue); set_bit(RDS_MSG_ON_SOCK, &rm->m_flags); rds_message_addref(rm); sock_hold(rds_rs_to_sk(rs)); rm->m_rs = rs; /* The code ordering is a little weird, but we're trying to minimize the time we hold c_lock */ rds_message_populate_header(&rm->m_inc.i_hdr, sport, dport, 0); rm->m_inc.i_conn = conn; rm->m_inc.i_conn_path = cp; rds_message_addref(rm); spin_lock(&cp->cp_lock); rm->m_inc.i_hdr.h_sequence = cpu_to_be64(cp->cp_next_tx_seq++); list_add_tail(&rm->m_conn_item, &cp->cp_send_queue); set_bit(RDS_MSG_ON_CONN, &rm->m_flags); spin_unlock(&cp->cp_lock); rdsdebug("queued msg %p len %d, rs %p bytes %d seq %llu\n", rm, len, rs, rs->rs_snd_bytes, (unsigned long long)be64_to_cpu(rm->m_inc.i_hdr.h_sequence)); *queued = 1; } spin_unlock_irqrestore(&rs->rs_lock, flags); out: return *queued; } /* * rds_message is getting to be quite complicated, and we'd like to allocate * it all in one go. This figures out how big it needs to be up front. */ static int rds_rm_size(struct msghdr *msg, int num_sgs, struct rds_iov_vector_arr *vct) { struct cmsghdr *cmsg; int size = 0; int cmsg_groups = 0; int retval; bool zcopy_cookie = false; struct rds_iov_vector *iov, *tmp_iov; if (num_sgs < 0) return -EINVAL; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; if (cmsg->cmsg_level != SOL_RDS) continue; switch (cmsg->cmsg_type) { case RDS_CMSG_RDMA_ARGS: if (vct->indx >= vct->len) { vct->len += vct->incr; tmp_iov = krealloc(vct->vec, vct->len * sizeof(struct rds_iov_vector), GFP_KERNEL); if (!tmp_iov) { vct->len -= vct->incr; return -ENOMEM; } vct->vec = tmp_iov; } iov = &vct->vec[vct->indx]; memset(iov, 0, sizeof(struct rds_iov_vector)); vct->indx++; cmsg_groups |= 1; retval = rds_rdma_extra_size(CMSG_DATA(cmsg), iov); if (retval < 0) return retval; size += retval; break; case RDS_CMSG_ZCOPY_COOKIE: zcopy_cookie = true; fallthrough; case RDS_CMSG_RDMA_DEST: case RDS_CMSG_RDMA_MAP: cmsg_groups |= 2; /* these are valid but do no add any size */ break; case RDS_CMSG_ATOMIC_CSWP: case RDS_CMSG_ATOMIC_FADD: case RDS_CMSG_MASKED_ATOMIC_CSWP: case RDS_CMSG_MASKED_ATOMIC_FADD: cmsg_groups |= 1; size += sizeof(struct scatterlist); break; default: return -EINVAL; } } if ((msg->msg_flags & MSG_ZEROCOPY) && !zcopy_cookie) return -EINVAL; size += num_sgs * sizeof(struct scatterlist); /* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */ if (cmsg_groups == 3) return -EINVAL; return size; } static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg) { u32 *cookie; if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)) || !rm->data.op_mmp_znotifier) return -EINVAL; cookie = CMSG_DATA(cmsg); rm->data.op_mmp_znotifier->z_cookie = *cookie; return 0; } static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, struct msghdr *msg, int *allocated_mr, struct rds_iov_vector_arr *vct) { struct cmsghdr *cmsg; int ret = 0, ind = 0; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; if (cmsg->cmsg_level != SOL_RDS) continue; /* As a side effect, RDMA_DEST and RDMA_MAP will set * rm->rdma.m_rdma_cookie and rm->rdma.m_rdma_mr. */ switch (cmsg->cmsg_type) { case RDS_CMSG_RDMA_ARGS: if (ind >= vct->indx) return -ENOMEM; ret = rds_cmsg_rdma_args(rs, rm, cmsg, &vct->vec[ind]); ind++; break; case RDS_CMSG_RDMA_DEST: ret = rds_cmsg_rdma_dest(rs, rm, cmsg); break; case RDS_CMSG_RDMA_MAP: ret = rds_cmsg_rdma_map(rs, rm, cmsg); if (!ret) *allocated_mr = 1; else if (ret == -ENODEV) /* Accommodate the get_mr() case which can fail * if connection isn't established yet. */ ret = -EAGAIN; break; case RDS_CMSG_ATOMIC_CSWP: case RDS_CMSG_ATOMIC_FADD: case RDS_CMSG_MASKED_ATOMIC_CSWP: case RDS_CMSG_MASKED_ATOMIC_FADD: ret = rds_cmsg_atomic(rs, rm, cmsg); break; case RDS_CMSG_ZCOPY_COOKIE: ret = rds_cmsg_zcopy(rs, rm, cmsg); break; default: return -EINVAL; } if (ret) break; } return ret; } static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn, int nonblock) { int hash; if (conn->c_npaths == 0) hash = RDS_MPATH_HASH(rs, RDS_MPATH_WORKERS); else hash = RDS_MPATH_HASH(rs, conn->c_npaths); if (conn->c_npaths == 0 && hash != 0) { rds_send_ping(conn, 0); /* The underlying connection is not up yet. Need to wait * until it is up to be sure that the non-zero c_path can be * used. But if we are interrupted, we have to use the zero * c_path in case the connection ends up being non-MP capable. */ if (conn->c_npaths == 0) { /* Cannot wait for the connection be made, so just use * the base c_path. */ if (nonblock) return 0; if (wait_event_interruptible(conn->c_hs_waitq, conn->c_npaths != 0)) hash = 0; } if (conn->c_npaths == 1) hash = 0; } return hash; } static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes) { struct rds_rdma_args *args; struct cmsghdr *cmsg; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; if (cmsg->cmsg_level != SOL_RDS) continue; if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) { if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args))) return -EINVAL; args = CMSG_DATA(cmsg); *rdma_bytes += args->remote_vec.bytes; } } return 0; } int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); __be16 dport; struct rds_message *rm = NULL; struct rds_connection *conn; int ret = 0; int queued = 0, allocated_mr = 0; int nonblock = msg->msg_flags & MSG_DONTWAIT; long timeo = sock_sndtimeo(sk, nonblock); struct rds_conn_path *cpath; struct in6_addr daddr; __u32 scope_id = 0; size_t rdma_payload_len = 0; bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) && sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY)); int num_sgs = DIV_ROUND_UP(payload_len, PAGE_SIZE); int namelen; struct rds_iov_vector_arr vct; int ind; memset(&vct, 0, sizeof(vct)); /* expect 1 RDMA CMSG per rds_sendmsg. can still grow if more needed. */ vct.incr = 1; /* Mirror Linux UDP mirror of BSD error message compatibility */ /* XXX: Perhaps MSG_MORE someday */ if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT | MSG_ZEROCOPY)) { ret = -EOPNOTSUPP; goto out; } namelen = msg->msg_namelen; if (namelen != 0) { if (namelen < sizeof(*usin)) { ret = -EINVAL; goto out; } switch (usin->sin_family) { case AF_INET: if (usin->sin_addr.s_addr == htonl(INADDR_ANY) || usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) || ipv4_is_multicast(usin->sin_addr.s_addr)) { ret = -EINVAL; goto out; } ipv6_addr_set_v4mapped(usin->sin_addr.s_addr, &daddr); dport = usin->sin_port; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { int addr_type; if (namelen < sizeof(*sin6)) { ret = -EINVAL; goto out; } addr_type = ipv6_addr_type(&sin6->sin6_addr); if (!(addr_type & IPV6_ADDR_UNICAST)) { __be32 addr4; if (!(addr_type & IPV6_ADDR_MAPPED)) { ret = -EINVAL; goto out; } /* It is a mapped address. Need to do some * sanity checks. */ addr4 = sin6->sin6_addr.s6_addr32[3]; if (addr4 == htonl(INADDR_ANY) || addr4 == htonl(INADDR_BROADCAST) || ipv4_is_multicast(addr4)) { ret = -EINVAL; goto out; } } if (addr_type & IPV6_ADDR_LINKLOCAL) { if (sin6->sin6_scope_id == 0) { ret = -EINVAL; goto out; } scope_id = sin6->sin6_scope_id; } daddr = sin6->sin6_addr; dport = sin6->sin6_port; break; } #endif default: ret = -EINVAL; goto out; } } else { /* We only care about consistency with ->connect() */ lock_sock(sk); daddr = rs->rs_conn_addr; dport = rs->rs_conn_port; scope_id = rs->rs_bound_scope_id; release_sock(sk); } lock_sock(sk); if (ipv6_addr_any(&rs->rs_bound_addr) || ipv6_addr_any(&daddr)) { release_sock(sk); ret = -ENOTCONN; goto out; } else if (namelen != 0) { /* Cannot send to an IPv4 address using an IPv6 source * address and cannot send to an IPv6 address using an * IPv4 source address. */ if (ipv6_addr_v4mapped(&daddr) ^ ipv6_addr_v4mapped(&rs->rs_bound_addr)) { release_sock(sk); ret = -EOPNOTSUPP; goto out; } /* If the socket is already bound to a link local address, * it can only send to peers on the same link. But allow * communicating between link local and non-link local address. */ if (scope_id != rs->rs_bound_scope_id) { if (!scope_id) { scope_id = rs->rs_bound_scope_id; } else if (rs->rs_bound_scope_id) { release_sock(sk); ret = -EINVAL; goto out; } } } release_sock(sk); ret = rds_rdma_bytes(msg, &rdma_payload_len); if (ret) goto out; if (max_t(size_t, payload_len, rdma_payload_len) > RDS_MAX_MSG_SIZE) { ret = -EMSGSIZE; goto out; } if (payload_len > rds_sk_sndbuf(rs)) { ret = -EMSGSIZE; goto out; } if (zcopy) { if (rs->rs_transport->t_type != RDS_TRANS_TCP) { ret = -EOPNOTSUPP; goto out; } num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX); } /* size of rm including all sgs */ ret = rds_rm_size(msg, num_sgs, &vct); if (ret < 0) goto out; rm = rds_message_alloc(ret, GFP_KERNEL); if (!rm) { ret = -ENOMEM; goto out; } /* Attach data to the rm */ if (payload_len) { rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); if (IS_ERR(rm->data.op_sg)) { ret = PTR_ERR(rm->data.op_sg); goto out; } ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy); if (ret) goto out; } rm->data.op_active = 1; rm->m_daddr = daddr; /* rds_conn_create has a spinlock that runs with IRQ off. * Caching the conn in the socket helps a lot. */ if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr) && rs->rs_tos == rs->rs_conn->c_tos) { conn = rs->rs_conn; } else { conn = rds_conn_create_outgoing(sock_net(sock->sk), &rs->rs_bound_addr, &daddr, rs->rs_transport, rs->rs_tos, sock->sk->sk_allocation, scope_id); if (IS_ERR(conn)) { ret = PTR_ERR(conn); goto out; } rs->rs_conn = conn; } if (conn->c_trans->t_mp_capable) cpath = &conn->c_path[rds_send_mprds_hash(rs, conn, nonblock)]; else cpath = &conn->c_path[0]; rm->m_conn_path = cpath; /* Parse any control messages the user may have included. */ ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct); if (ret) { /* Trigger connection so that its ready for the next retry */ if (ret == -EAGAIN) rds_conn_connect_if_down(conn); goto out; } if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", &rm->rdma, conn->c_trans->xmit_rdma); ret = -EOPNOTSUPP; goto out; } if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) { printk_ratelimited(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n", &rm->atomic, conn->c_trans->xmit_atomic); ret = -EOPNOTSUPP; goto out; } if (rds_destroy_pending(conn)) { ret = -EAGAIN; goto out; } if (rds_conn_path_down(cpath)) rds_check_all_paths(conn); ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); if (ret) { rs->rs_seen_congestion = 1; goto out; } while (!rds_send_queue_rm(rs, conn, cpath, rm, rs->rs_bound_port, dport, &queued)) { rds_stats_inc(s_send_queue_full); if (nonblock) { ret = -EAGAIN; goto out; } timeo = wait_event_interruptible_timeout(*sk_sleep(sk), rds_send_queue_rm(rs, conn, cpath, rm, rs->rs_bound_port, dport, &queued), timeo); rdsdebug("sendmsg woke queued %d timeo %ld\n", queued, timeo); if (timeo > 0 || timeo == MAX_SCHEDULE_TIMEOUT) continue; ret = timeo; if (ret == 0) ret = -ETIMEDOUT; goto out; } /* * By now we've committed to the send. We reuse rds_send_worker() * to retry sends in the rds thread if the transport asks us to. */ rds_stats_inc(s_send_queued); ret = rds_send_xmit(cpath); if (ret == -ENOMEM || ret == -EAGAIN) { ret = 0; rcu_read_lock(); if (rds_destroy_pending(cpath->cp_conn)) ret = -ENETUNREACH; else queue_delayed_work(rds_wq, &cpath->cp_send_w, 1); rcu_read_unlock(); } if (ret) goto out; rds_message_put(rm); for (ind = 0; ind < vct.indx; ind++) kfree(vct.vec[ind].iov); kfree(vct.vec); return payload_len; out: for (ind = 0; ind < vct.indx; ind++) kfree(vct.vec[ind].iov); kfree(vct.vec); /* If the user included a RDMA_MAP cmsg, we allocated a MR on the fly. * If the sendmsg goes through, we keep the MR. If it fails with EAGAIN * or in any other way, we need to destroy the MR again */ if (allocated_mr) rds_rdma_unuse(rs, rds_rdma_cookie_key(rm->m_rdma_cookie), 1); if (rm) rds_message_put(rm); return ret; } /* * send out a probe. Can be shared by rds_send_ping, * rds_send_pong, rds_send_hb. * rds_send_hb should use h_flags * RDS_FLAG_HB_PING|RDS_FLAG_ACK_REQUIRED * or * RDS_FLAG_HB_PONG|RDS_FLAG_ACK_REQUIRED */ static int rds_send_probe(struct rds_conn_path *cp, __be16 sport, __be16 dport, u8 h_flags) { struct rds_message *rm; unsigned long flags; int ret = 0; rm = rds_message_alloc(0, GFP_ATOMIC); if (!rm) { ret = -ENOMEM; goto out; } rm->m_daddr = cp->cp_conn->c_faddr; rm->data.op_active = 1; rds_conn_path_connect_if_down(cp); ret = rds_cong_wait(cp->cp_conn->c_fcong, dport, 1, NULL); if (ret) goto out; spin_lock_irqsave(&cp->cp_lock, flags); list_add_tail(&rm->m_conn_item, &cp->cp_send_queue); set_bit(RDS_MSG_ON_CONN, &rm->m_flags); rds_message_addref(rm); rm->m_inc.i_conn = cp->cp_conn; rm->m_inc.i_conn_path = cp; rds_message_populate_header(&rm->m_inc.i_hdr, sport, dport, cp->cp_next_tx_seq); rm->m_inc.i_hdr.h_flags |= h_flags; cp->cp_next_tx_seq++; if (RDS_HS_PROBE(be16_to_cpu(sport), be16_to_cpu(dport)) && cp->cp_conn->c_trans->t_mp_capable) { u16 npaths = cpu_to_be16(RDS_MPATH_WORKERS); u32 my_gen_num = cpu_to_be32(cp->cp_conn->c_my_gen_num); rds_message_add_extension(&rm->m_inc.i_hdr, RDS_EXTHDR_NPATHS, &npaths, sizeof(npaths)); rds_message_add_extension(&rm->m_inc.i_hdr, RDS_EXTHDR_GEN_NUM, &my_gen_num, sizeof(u32)); } spin_unlock_irqrestore(&cp->cp_lock, flags); rds_stats_inc(s_send_queued); rds_stats_inc(s_send_pong); /* schedule the send work on rds_wq */ rcu_read_lock(); if (!rds_destroy_pending(cp->cp_conn)) queue_delayed_work(rds_wq, &cp->cp_send_w, 1); rcu_read_unlock(); rds_message_put(rm); return 0; out: if (rm) rds_message_put(rm); return ret; } int rds_send_pong(struct rds_conn_path *cp, __be16 dport) { return rds_send_probe(cp, 0, dport, 0); } void rds_send_ping(struct rds_connection *conn, int cp_index) { unsigned long flags; struct rds_conn_path *cp = &conn->c_path[cp_index]; spin_lock_irqsave(&cp->cp_lock, flags); if (conn->c_ping_triggered) { spin_unlock_irqrestore(&cp->cp_lock, flags); return; } conn->c_ping_triggered = 1; spin_unlock_irqrestore(&cp->cp_lock, flags); rds_send_probe(cp, cpu_to_be16(RDS_FLAG_PROBE_PORT), 0, 0); } EXPORT_SYMBOL_GPL(rds_send_ping); |
726 726 52 51 214 214 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 | // SPDX-License-Identifier: GPL-2.0-or-later /* auditsc.c -- System-call auditing support * Handles all system-call specific auditing features. * * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. * Copyright 2005 Hewlett-Packard Development Company, L.P. * Copyright (C) 2005, 2006 IBM Corporation * All Rights Reserved. * * Written by Rickard E. (Rik) Faith <faith@redhat.com> * * Many of the ideas implemented here are from Stephen C. Tweedie, * especially the idea of avoiding a copy by using getname. * * The method for actual interception of syscall entry and exit (not in * this file -- see entry.S) is based on a GPL'd patch written by * okir@suse.de and Copyright 2003 SuSE Linux AG. * * POSIX message queue support added by George Wilson <ltcgcw@us.ibm.com>, * 2006. * * The support of additional filter rules compares (>, <, >=, <=) was * added by Dustin Kirkland <dustin.kirkland@us.ibm.com>, 2005. * * Modified by Amy Griffis <amy.griffis@hp.com> to collect additional * filesystem information. * * Subject and object context labeling support added by <danjones@us.ibm.com> * and <dustin.kirkland@us.ibm.com> for LSPP certification compliance. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/init.h> #include <asm/types.h> #include <linux/atomic.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/mm.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/mount.h> #include <linux/socket.h> #include <linux/mqueue.h> #include <linux/audit.h> #include <linux/personality.h> #include <linux/time.h> #include <linux/netlink.h> #include <linux/compiler.h> #include <asm/unistd.h> #include <linux/security.h> #include <linux/list.h> #include <linux/binfmts.h> #include <linux/highmem.h> #include <linux/syscalls.h> #include <asm/syscall.h> #include <linux/capability.h> #include <linux/fs_struct.h> #include <linux/compat.h> #include <linux/ctype.h> #include <linux/string.h> #include <linux/uaccess.h> #include <linux/fsnotify_backend.h> #include <uapi/linux/limits.h> #include <uapi/linux/netfilter/nf_tables.h> #include <uapi/linux/openat2.h> // struct open_how #include <uapi/linux/fanotify.h> #include "audit.h" /* flags stating the success for a syscall */ #define AUDITSC_INVALID 0 #define AUDITSC_SUCCESS 1 #define AUDITSC_FAILURE 2 /* no execve audit message should be longer than this (userspace limits), * see the note near the top of audit_log_execve_info() about this value */ #define MAX_EXECVE_AUDIT_LEN 7500 /* max length to print of cmdline/proctitle value during audit */ #define MAX_PROCTITLE_AUDIT_LEN 128 /* number of audit rules */ int audit_n_rules; /* determines whether we collect data for signals sent */ int audit_signals; struct audit_aux_data { struct audit_aux_data *next; int type; }; /* Number of target pids per aux struct. */ #define AUDIT_AUX_PIDS 16 struct audit_aux_data_pids { struct audit_aux_data d; pid_t target_pid[AUDIT_AUX_PIDS]; kuid_t target_auid[AUDIT_AUX_PIDS]; kuid_t target_uid[AUDIT_AUX_PIDS]; unsigned int target_sessionid[AUDIT_AUX_PIDS]; u32 target_sid[AUDIT_AUX_PIDS]; char target_comm[AUDIT_AUX_PIDS][TASK_COMM_LEN]; int pid_count; }; struct audit_aux_data_bprm_fcaps { struct audit_aux_data d; struct audit_cap_data fcap; unsigned int fcap_ver; struct audit_cap_data old_pcap; struct audit_cap_data new_pcap; }; struct audit_tree_refs { struct audit_tree_refs *next; struct audit_chunk *c[31]; }; struct audit_nfcfgop_tab { enum audit_nfcfgop op; const char *s; }; static const struct audit_nfcfgop_tab audit_nfcfgs[] = { { AUDIT_XT_OP_REGISTER, "xt_register" }, { AUDIT_XT_OP_REPLACE, "xt_replace" }, { AUDIT_XT_OP_UNREGISTER, "xt_unregister" }, { AUDIT_NFT_OP_TABLE_REGISTER, "nft_register_table" }, { AUDIT_NFT_OP_TABLE_UNREGISTER, "nft_unregister_table" }, { AUDIT_NFT_OP_CHAIN_REGISTER, "nft_register_chain" }, { AUDIT_NFT_OP_CHAIN_UNREGISTER, "nft_unregister_chain" }, { AUDIT_NFT_OP_RULE_REGISTER, "nft_register_rule" }, { AUDIT_NFT_OP_RULE_UNREGISTER, "nft_unregister_rule" }, { AUDIT_NFT_OP_SET_REGISTER, "nft_register_set" }, { AUDIT_NFT_OP_SET_UNREGISTER, "nft_unregister_set" }, { AUDIT_NFT_OP_SETELEM_REGISTER, "nft_register_setelem" }, { AUDIT_NFT_OP_SETELEM_UNREGISTER, "nft_unregister_setelem" }, { AUDIT_NFT_OP_GEN_REGISTER, "nft_register_gen" }, { AUDIT_NFT_OP_OBJ_REGISTER, "nft_register_obj" }, { AUDIT_NFT_OP_OBJ_UNREGISTER, "nft_unregister_obj" }, { AUDIT_NFT_OP_OBJ_RESET, "nft_reset_obj" }, { AUDIT_NFT_OP_FLOWTABLE_REGISTER, "nft_register_flowtable" }, { AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, "nft_unregister_flowtable" }, { AUDIT_NFT_OP_SETELEM_RESET, "nft_reset_setelem" }, { AUDIT_NFT_OP_RULE_RESET, "nft_reset_rule" }, { AUDIT_NFT_OP_INVALID, "nft_invalid" }, }; static int audit_match_perm(struct audit_context *ctx, int mask) { unsigned n; if (unlikely(!ctx)) return 0; n = ctx->major; switch (audit_classify_syscall(ctx->arch, n)) { case AUDITSC_NATIVE: if ((mask & AUDIT_PERM_WRITE) && audit_match_class(AUDIT_CLASS_WRITE, n)) return 1; if ((mask & AUDIT_PERM_READ) && audit_match_class(AUDIT_CLASS_READ, n)) return 1; if ((mask & AUDIT_PERM_ATTR) && audit_match_class(AUDIT_CLASS_CHATTR, n)) return 1; return 0; case AUDITSC_COMPAT: /* 32bit on biarch */ if ((mask & AUDIT_PERM_WRITE) && audit_match_class(AUDIT_CLASS_WRITE_32, n)) return 1; if ((mask & AUDIT_PERM_READ) && audit_match_class(AUDIT_CLASS_READ_32, n)) return 1; if ((mask & AUDIT_PERM_ATTR) && audit_match_class(AUDIT_CLASS_CHATTR_32, n)) return 1; return 0; case AUDITSC_OPEN: return mask & ACC_MODE(ctx->argv[1]); case AUDITSC_OPENAT: return mask & ACC_MODE(ctx->argv[2]); case AUDITSC_SOCKETCALL: return ((mask & AUDIT_PERM_WRITE) && ctx->argv[0] == SYS_BIND); case AUDITSC_EXECVE: return mask & AUDIT_PERM_EXEC; case AUDITSC_OPENAT2: return mask & ACC_MODE((u32)ctx->openat2.flags); default: return 0; } } static int audit_match_filetype(struct audit_context *ctx, int val) { struct audit_names *n; umode_t mode = (umode_t)val; if (unlikely(!ctx)) return 0; list_for_each_entry(n, &ctx->names_list, list) { if ((n->ino != AUDIT_INO_UNSET) && ((n->mode & S_IFMT) == mode)) return 1; } return 0; } /* * We keep a linked list of fixed-sized (31 pointer) arrays of audit_chunk *; * ->first_trees points to its beginning, ->trees - to the current end of data. * ->tree_count is the number of free entries in array pointed to by ->trees. * Original condition is (NULL, NULL, 0); as soon as it grows we never revert to NULL, * "empty" becomes (p, p, 31) afterwards. We don't shrink the list (and seriously, * it's going to remain 1-element for almost any setup) until we free context itself. * References in it _are_ dropped - at the same time we free/drop aux stuff. */ static void audit_set_auditable(struct audit_context *ctx) { if (!ctx->prio) { ctx->prio = 1; ctx->current_state = AUDIT_STATE_RECORD; } } static int put_tree_ref(struct audit_context *ctx, struct audit_chunk *chunk) { struct audit_tree_refs *p = ctx->trees; int left = ctx->tree_count; if (likely(left)) { p->c[--left] = chunk; ctx->tree_count = left; return 1; } if (!p) return 0; p = p->next; if (p) { p->c[30] = chunk; ctx->trees = p; ctx->tree_count = 30; return 1; } return 0; } static int grow_tree_refs(struct audit_context *ctx) { struct audit_tree_refs *p = ctx->trees; ctx->trees = kzalloc(sizeof(struct audit_tree_refs), GFP_KERNEL); if (!ctx->trees) { ctx->trees = p; return 0; } if (p) p->next = ctx->trees; else ctx->first_trees = ctx->trees; ctx->tree_count = 31; return 1; } static void unroll_tree_refs(struct audit_context *ctx, struct audit_tree_refs *p, int count) { struct audit_tree_refs *q; int n; if (!p) { /* we started with empty chain */ p = ctx->first_trees; count = 31; /* if the very first allocation has failed, nothing to do */ if (!p) return; } n = count; for (q = p; q != ctx->trees; q = q->next, n = 31) { while (n--) { audit_put_chunk(q->c[n]); q->c[n] = NULL; } } while (n-- > ctx->tree_count) { audit_put_chunk(q->c[n]); q->c[n] = NULL; } ctx->trees = p; ctx->tree_count = count; } static void free_tree_refs(struct audit_context *ctx) { struct audit_tree_refs *p, *q; for (p = ctx->first_trees; p; p = q) { q = p->next; kfree(p); } } static int match_tree_refs(struct audit_context *ctx, struct audit_tree *tree) { struct audit_tree_refs *p; int n; if (!tree) return 0; /* full ones */ for (p = ctx->first_trees; p != ctx->trees; p = p->next) { for (n = 0; n < 31; n++) if (audit_tree_match(p->c[n], tree)) return 1; } /* partial */ if (p) { for (n = ctx->tree_count; n < 31; n++) if (audit_tree_match(p->c[n], tree)) return 1; } return 0; } static int audit_compare_uid(kuid_t uid, struct audit_names *name, struct audit_field *f, struct audit_context *ctx) { struct audit_names *n; int rc; if (name) { rc = audit_uid_comparator(uid, f->op, name->uid); if (rc) return rc; } if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { rc = audit_uid_comparator(uid, f->op, n->uid); if (rc) return rc; } } return 0; } static int audit_compare_gid(kgid_t gid, struct audit_names *name, struct audit_field *f, struct audit_context *ctx) { struct audit_names *n; int rc; if (name) { rc = audit_gid_comparator(gid, f->op, name->gid); if (rc) return rc; } if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { rc = audit_gid_comparator(gid, f->op, n->gid); if (rc) return rc; } } return 0; } static int audit_field_compare(struct task_struct *tsk, const struct cred *cred, struct audit_field *f, struct audit_context *ctx, struct audit_names *name) { switch (f->val) { /* process to file object comparisons */ case AUDIT_COMPARE_UID_TO_OBJ_UID: return audit_compare_uid(cred->uid, name, f, ctx); case AUDIT_COMPARE_GID_TO_OBJ_GID: return audit_compare_gid(cred->gid, name, f, ctx); case AUDIT_COMPARE_EUID_TO_OBJ_UID: return audit_compare_uid(cred->euid, name, f, ctx); case AUDIT_COMPARE_EGID_TO_OBJ_GID: return audit_compare_gid(cred->egid, name, f, ctx); case AUDIT_COMPARE_AUID_TO_OBJ_UID: return audit_compare_uid(audit_get_loginuid(tsk), name, f, ctx); case AUDIT_COMPARE_SUID_TO_OBJ_UID: return audit_compare_uid(cred->suid, name, f, ctx); case AUDIT_COMPARE_SGID_TO_OBJ_GID: return audit_compare_gid(cred->sgid, name, f, ctx); case AUDIT_COMPARE_FSUID_TO_OBJ_UID: return audit_compare_uid(cred->fsuid, name, f, ctx); case AUDIT_COMPARE_FSGID_TO_OBJ_GID: return audit_compare_gid(cred->fsgid, name, f, ctx); /* uid comparisons */ case AUDIT_COMPARE_UID_TO_AUID: return audit_uid_comparator(cred->uid, f->op, audit_get_loginuid(tsk)); case AUDIT_COMPARE_UID_TO_EUID: return audit_uid_comparator(cred->uid, f->op, cred->euid); case AUDIT_COMPARE_UID_TO_SUID: return audit_uid_comparator(cred->uid, f->op, cred->suid); case AUDIT_COMPARE_UID_TO_FSUID: return audit_uid_comparator(cred->uid, f->op, cred->fsuid); /* auid comparisons */ case AUDIT_COMPARE_AUID_TO_EUID: return audit_uid_comparator(audit_get_loginuid(tsk), f->op, cred->euid); case AUDIT_COMPARE_AUID_TO_SUID: return audit_uid_comparator(audit_get_loginuid(tsk), f->op, cred->suid); case AUDIT_COMPARE_AUID_TO_FSUID: return audit_uid_comparator(audit_get_loginuid(tsk), f->op, cred->fsuid); /* euid comparisons */ case AUDIT_COMPARE_EUID_TO_SUID: return audit_uid_comparator(cred->euid, f->op, cred->suid); case AUDIT_COMPARE_EUID_TO_FSUID: return audit_uid_comparator(cred->euid, f->op, cred->fsuid); /* suid comparisons */ case AUDIT_COMPARE_SUID_TO_FSUID: return audit_uid_comparator(cred->suid, f->op, cred->fsuid); /* gid comparisons */ case AUDIT_COMPARE_GID_TO_EGID: return audit_gid_comparator(cred->gid, f->op, cred->egid); case AUDIT_COMPARE_GID_TO_SGID: return audit_gid_comparator(cred->gid, f->op, cred->sgid); case AUDIT_COMPARE_GID_TO_FSGID: return audit_gid_comparator(cred->gid, f->op, cred->fsgid); /* egid comparisons */ case AUDIT_COMPARE_EGID_TO_SGID: return audit_gid_comparator(cred->egid, f->op, cred->sgid); case AUDIT_COMPARE_EGID_TO_FSGID: return audit_gid_comparator(cred->egid, f->op, cred->fsgid); /* sgid comparison */ case AUDIT_COMPARE_SGID_TO_FSGID: return audit_gid_comparator(cred->sgid, f->op, cred->fsgid); default: WARN(1, "Missing AUDIT_COMPARE define. Report as a bug\n"); return 0; } return 0; } /* Determine if any context name data matches a rule's watch data */ /* Compare a task_struct with an audit_rule. Return 1 on match, 0 * otherwise. * * If task_creation is true, this is an explicit indication that we are * filtering a task rule at task creation time. This and tsk == current are * the only situations where tsk->cred may be accessed without an rcu read lock. */ static int audit_filter_rules(struct task_struct *tsk, struct audit_krule *rule, struct audit_context *ctx, struct audit_names *name, enum audit_state *state, bool task_creation) { const struct cred *cred; int i, need_sid = 1; u32 sid; unsigned int sessionid; if (ctx && rule->prio <= ctx->prio) return 0; cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation); for (i = 0; i < rule->field_count; i++) { struct audit_field *f = &rule->fields[i]; struct audit_names *n; int result = 0; pid_t pid; switch (f->type) { case AUDIT_PID: pid = task_tgid_nr(tsk); result = audit_comparator(pid, f->op, f->val); break; case AUDIT_PPID: if (ctx) { if (!ctx->ppid) ctx->ppid = task_ppid_nr(tsk); result = audit_comparator(ctx->ppid, f->op, f->val); } break; case AUDIT_EXE: result = audit_exe_compare(tsk, rule->exe); if (f->op == Audit_not_equal) result = !result; break; case AUDIT_UID: result = audit_uid_comparator(cred->uid, f->op, f->uid); break; case AUDIT_EUID: result = audit_uid_comparator(cred->euid, f->op, f->uid); break; case AUDIT_SUID: result = audit_uid_comparator(cred->suid, f->op, f->uid); break; case AUDIT_FSUID: result = audit_uid_comparator(cred->fsuid, f->op, f->uid); break; case AUDIT_GID: result = audit_gid_comparator(cred->gid, f->op, f->gid); if (f->op == Audit_equal) { if (!result) result = groups_search(cred->group_info, f->gid); } else if (f->op == Audit_not_equal) { if (result) result = !groups_search(cred->group_info, f->gid); } break; case AUDIT_EGID: result = audit_gid_comparator(cred->egid, f->op, f->gid); if (f->op == Audit_equal) { if (!result) result = groups_search(cred->group_info, f->gid); } else if (f->op == Audit_not_equal) { if (result) result = !groups_search(cred->group_info, f->gid); } break; case AUDIT_SGID: result = audit_gid_comparator(cred->sgid, f->op, f->gid); break; case AUDIT_FSGID: result = audit_gid_comparator(cred->fsgid, f->op, f->gid); break; case AUDIT_SESSIONID: sessionid = audit_get_sessionid(tsk); result = audit_comparator(sessionid, f->op, f->val); break; case AUDIT_PERS: result = audit_comparator(tsk->personality, f->op, f->val); break; case AUDIT_ARCH: if (ctx) result = audit_comparator(ctx->arch, f->op, f->val); break; case AUDIT_EXIT: if (ctx && ctx->return_valid != AUDITSC_INVALID) result = audit_comparator(ctx->return_code, f->op, f->val); break; case AUDIT_SUCCESS: if (ctx && ctx->return_valid != AUDITSC_INVALID) { if (f->val) result = audit_comparator(ctx->return_valid, f->op, AUDITSC_SUCCESS); else result = audit_comparator(ctx->return_valid, f->op, AUDITSC_FAILURE); } break; case AUDIT_DEVMAJOR: if (name) { if (audit_comparator(MAJOR(name->dev), f->op, f->val) || audit_comparator(MAJOR(name->rdev), f->op, f->val)) ++result; } else if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { if (audit_comparator(MAJOR(n->dev), f->op, f->val) || audit_comparator(MAJOR(n->rdev), f->op, f->val)) { ++result; break; } } } break; case AUDIT_DEVMINOR: if (name) { if (audit_comparator(MINOR(name->dev), f->op, f->val) || audit_comparator(MINOR(name->rdev), f->op, f->val)) ++result; } else if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { if (audit_comparator(MINOR(n->dev), f->op, f->val) || audit_comparator(MINOR(n->rdev), f->op, f->val)) { ++result; break; } } } break; case AUDIT_INODE: if (name) result = audit_comparator(name->ino, f->op, f->val); else if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { if (audit_comparator(n->ino, f->op, f->val)) { ++result; break; } } } break; case AUDIT_OBJ_UID: if (name) { result = audit_uid_comparator(name->uid, f->op, f->uid); } else if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { if (audit_uid_comparator(n->uid, f->op, f->uid)) { ++result; break; } } } break; case AUDIT_OBJ_GID: if (name) { result = audit_gid_comparator(name->gid, f->op, f->gid); } else if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { if (audit_gid_comparator(n->gid, f->op, f->gid)) { ++result; break; } } } break; case AUDIT_WATCH: if (name) { result = audit_watch_compare(rule->watch, name->ino, name->dev); if (f->op == Audit_not_equal) result = !result; } break; case AUDIT_DIR: if (ctx) { result = match_tree_refs(ctx, rule->tree); if (f->op == Audit_not_equal) result = !result; } break; case AUDIT_LOGINUID: result = audit_uid_comparator(audit_get_loginuid(tsk), f->op, f->uid); break; case AUDIT_LOGINUID_SET: result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val); break; case AUDIT_SADDR_FAM: if (ctx && ctx->sockaddr) result = audit_comparator(ctx->sockaddr->ss_family, f->op, f->val); break; case AUDIT_SUBJ_USER: case AUDIT_SUBJ_ROLE: case AUDIT_SUBJ_TYPE: case AUDIT_SUBJ_SEN: case AUDIT_SUBJ_CLR: /* NOTE: this may return negative values indicating a temporary error. We simply treat this as a match for now to avoid losing information that may be wanted. An error message will also be logged upon error */ if (f->lsm_rule) { if (need_sid) { /* @tsk should always be equal to * @current with the exception of * fork()/copy_process() in which case * the new @tsk creds are still a dup * of @current's creds so we can still * use security_current_getsecid_subj() * here even though it always refs * @current's creds */ security_current_getsecid_subj(&sid); need_sid = 0; } result = security_audit_rule_match(sid, f->type, f->op, f->lsm_rule); } break; case AUDIT_OBJ_USER: case AUDIT_OBJ_ROLE: case AUDIT_OBJ_TYPE: case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: /* The above note for AUDIT_SUBJ_USER...AUDIT_SUBJ_CLR also applies here */ if (f->lsm_rule) { /* Find files that match */ if (name) { result = security_audit_rule_match( name->osid, f->type, f->op, f->lsm_rule); } else if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { if (security_audit_rule_match( n->osid, f->type, f->op, f->lsm_rule)) { ++result; break; } } } /* Find ipc objects that match */ if (!ctx || ctx->type != AUDIT_IPC) break; if (security_audit_rule_match(ctx->ipc.osid, f->type, f->op, f->lsm_rule)) ++result; } break; case AUDIT_ARG0: case AUDIT_ARG1: case AUDIT_ARG2: case AUDIT_ARG3: if (ctx) result = audit_comparator(ctx->argv[f->type-AUDIT_ARG0], f->op, f->val); break; case AUDIT_FILTERKEY: /* ignore this field for filtering */ result = 1; break; case AUDIT_PERM: result = audit_match_perm(ctx, f->val); if (f->op == Audit_not_equal) result = !result; break; case AUDIT_FILETYPE: result = audit_match_filetype(ctx, f->val); if (f->op == Audit_not_equal) result = !result; break; case AUDIT_FIELD_COMPARE: result = audit_field_compare(tsk, cred, f, ctx, name); break; } if (!result) return 0; } if (ctx) { if (rule->filterkey) { kfree(ctx->filterkey); ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC); } ctx->prio = rule->prio; } switch (rule->action) { case AUDIT_NEVER: *state = AUDIT_STATE_DISABLED; break; case AUDIT_ALWAYS: *state = AUDIT_STATE_RECORD; break; } return 1; } /* At process creation time, we can determine if system-call auditing is * completely disabled for this task. Since we only have the task * structure at this point, we can only check uid and gid. */ static enum audit_state audit_filter_task(struct task_struct *tsk, char **key) { struct audit_entry *e; enum audit_state state; rcu_read_lock(); list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) { if (audit_filter_rules(tsk, &e->rule, NULL, NULL, &state, true)) { if (state == AUDIT_STATE_RECORD) *key = kstrdup(e->rule.filterkey, GFP_ATOMIC); rcu_read_unlock(); return state; } } rcu_read_unlock(); return AUDIT_STATE_BUILD; } static int audit_in_mask(const struct audit_krule *rule, unsigned long val) { int word, bit; if (val > 0xffffffff) return false; word = AUDIT_WORD(val); if (word >= AUDIT_BITMASK_SIZE) return false; bit = AUDIT_BIT(val); return rule->mask[word] & bit; } /** * __audit_filter_op - common filter helper for operations (syscall/uring/etc) * @tsk: associated task * @ctx: audit context * @list: audit filter list * @name: audit_name (can be NULL) * @op: current syscall/uring_op * * Run the udit filters specified in @list against @tsk using @ctx, * @name, and @op, as necessary; the caller is responsible for ensuring * that the call is made while the RCU read lock is held. The @name * parameter can be NULL, but all others must be specified. * Returns 1/true if the filter finds a match, 0/false if none are found. */ static int __audit_filter_op(struct task_struct *tsk, struct audit_context *ctx, struct list_head *list, struct audit_names *name, unsigned long op) { struct audit_entry *e; enum audit_state state; list_for_each_entry_rcu(e, list, list) { if (audit_in_mask(&e->rule, op) && audit_filter_rules(tsk, &e->rule, ctx, name, &state, false)) { ctx->current_state = state; return 1; } } return 0; } /** * audit_filter_uring - apply filters to an io_uring operation * @tsk: associated task * @ctx: audit context */ static void audit_filter_uring(struct task_struct *tsk, struct audit_context *ctx) { if (auditd_test_task(tsk)) return; rcu_read_lock(); __audit_filter_op(tsk, ctx, &audit_filter_list[AUDIT_FILTER_URING_EXIT], NULL, ctx->uring_op); rcu_read_unlock(); } /* At syscall exit time, this filter is called if the audit_state is * not low enough that auditing cannot take place, but is also not * high enough that we already know we have to write an audit record * (i.e., the state is AUDIT_STATE_BUILD). */ static void audit_filter_syscall(struct task_struct *tsk, struct audit_context *ctx) { if (auditd_test_task(tsk)) return; rcu_read_lock(); __audit_filter_op(tsk, ctx, &audit_filter_list[AUDIT_FILTER_EXIT], NULL, ctx->major); rcu_read_unlock(); } /* * Given an audit_name check the inode hash table to see if they match. * Called holding the rcu read lock to protect the use of audit_inode_hash */ static int audit_filter_inode_name(struct task_struct *tsk, struct audit_names *n, struct audit_context *ctx) { int h = audit_hash_ino((u32)n->ino); struct list_head *list = &audit_inode_hash[h]; return __audit_filter_op(tsk, ctx, list, n, ctx->major); } /* At syscall exit time, this filter is called if any audit_names have been * collected during syscall processing. We only check rules in sublists at hash * buckets applicable to the inode numbers in audit_names. * Regarding audit_state, same rules apply as for audit_filter_syscall(). */ void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx) { struct audit_names *n; if (auditd_test_task(tsk)) return; rcu_read_lock(); list_for_each_entry(n, &ctx->names_list, list) { if (audit_filter_inode_name(tsk, n, ctx)) break; } rcu_read_unlock(); } static inline void audit_proctitle_free(struct audit_context *context) { kfree(context->proctitle.value); context->proctitle.value = NULL; context->proctitle.len = 0; } static inline void audit_free_module(struct audit_context *context) { if (context->type == AUDIT_KERN_MODULE) { kfree(context->module.name); context->module.name = NULL; } } static inline void audit_free_names(struct audit_context *context) { struct audit_names *n, *next; list_for_each_entry_safe(n, next, &context->names_list, list) { list_del(&n->list); if (n->name) putname(n->name); if (n->should_free) kfree(n); } context->name_count = 0; path_put(&context->pwd); context->pwd.dentry = NULL; context->pwd.mnt = NULL; } static inline void audit_free_aux(struct audit_context *context) { struct audit_aux_data *aux; while ((aux = context->aux)) { context->aux = aux->next; kfree(aux); } context->aux = NULL; while ((aux = context->aux_pids)) { context->aux_pids = aux->next; kfree(aux); } context->aux_pids = NULL; } /** * audit_reset_context - reset a audit_context structure * @ctx: the audit_context to reset * * All fields in the audit_context will be reset to an initial state, all * references held by fields will be dropped, and private memory will be * released. When this function returns the audit_context will be suitable * for reuse, so long as the passed context is not NULL or a dummy context. */ static void audit_reset_context(struct audit_context *ctx) { if (!ctx) return; /* if ctx is non-null, reset the "ctx->context" regardless */ ctx->context = AUDIT_CTX_UNUSED; if (ctx->dummy) return; /* * NOTE: It shouldn't matter in what order we release the fields, so * release them in the order in which they appear in the struct; * this gives us some hope of quickly making sure we are * resetting the audit_context properly. * * Other things worth mentioning: * - we don't reset "dummy" * - we don't reset "state", we do reset "current_state" * - we preserve "filterkey" if "state" is AUDIT_STATE_RECORD * - much of this is likely overkill, but play it safe for now * - we really need to work on improving the audit_context struct */ ctx->current_state = ctx->state; ctx->serial = 0; ctx->major = 0; ctx->uring_op = 0; ctx->ctime = (struct timespec64){ .tv_sec = 0, .tv_nsec = 0 }; memset(ctx->argv, 0, sizeof(ctx->argv)); ctx->return_code = 0; ctx->prio = (ctx->state == AUDIT_STATE_RECORD ? ~0ULL : 0); ctx->return_valid = AUDITSC_INVALID; audit_free_names(ctx); if (ctx->state != AUDIT_STATE_RECORD) { kfree(ctx->filterkey); ctx->filterkey = NULL; } audit_free_aux(ctx); kfree(ctx->sockaddr); ctx->sockaddr = NULL; ctx->sockaddr_len = 0; ctx->ppid = 0; ctx->uid = ctx->euid = ctx->suid = ctx->fsuid = KUIDT_INIT(0); ctx->gid = ctx->egid = ctx->sgid = ctx->fsgid = KGIDT_INIT(0); ctx->personality = 0; ctx->arch = 0; ctx->target_pid = 0; ctx->target_auid = ctx->target_uid = KUIDT_INIT(0); ctx->target_sessionid = 0; ctx->target_sid = 0; ctx->target_comm[0] = '\0'; unroll_tree_refs(ctx, NULL, 0); WARN_ON(!list_empty(&ctx->killed_trees)); audit_free_module(ctx); ctx->fds[0] = -1; ctx->type = 0; /* reset last for audit_free_*() */ } static inline struct audit_context *audit_alloc_context(enum audit_state state) { struct audit_context *context; context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return NULL; context->context = AUDIT_CTX_UNUSED; context->state = state; context->prio = state == AUDIT_STATE_RECORD ? ~0ULL : 0; INIT_LIST_HEAD(&context->killed_trees); INIT_LIST_HEAD(&context->names_list); context->fds[0] = -1; context->return_valid = AUDITSC_INVALID; return context; } /** * audit_alloc - allocate an audit context block for a task * @tsk: task * * Filter on the task information and allocate a per-task audit context * if necessary. Doing so turns on system call auditing for the * specified task. This is called from copy_process, so no lock is * needed. */ int audit_alloc(struct task_struct *tsk) { struct audit_context *context; enum audit_state state; char *key = NULL; if (likely(!audit_ever_enabled)) return 0; state = audit_filter_task(tsk, &key); if (state == AUDIT_STATE_DISABLED) { clear_task_syscall_work(tsk, SYSCALL_AUDIT); return 0; } context = audit_alloc_context(state); if (!context) { kfree(key); audit_log_lost("out of memory in audit_alloc"); return -ENOMEM; } context->filterkey = key; audit_set_context(tsk, context); set_task_syscall_work(tsk, SYSCALL_AUDIT); return 0; } static inline void audit_free_context(struct audit_context *context) { /* resetting is extra work, but it is likely just noise */ audit_reset_context(context); audit_proctitle_free(context); free_tree_refs(context); kfree(context->filterkey); kfree(context); } static int audit_log_pid_context(struct audit_context *context, pid_t pid, kuid_t auid, kuid_t uid, unsigned int sessionid, u32 sid, char *comm) { struct audit_buffer *ab; char *ctx = NULL; u32 len; int rc = 0; ab = audit_log_start(context, GFP_KERNEL, AUDIT_OBJ_PID); if (!ab) return rc; audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid, from_kuid(&init_user_ns, auid), from_kuid(&init_user_ns, uid), sessionid); if (sid) { if (security_secid_to_secctx(sid, &ctx, &len)) { audit_log_format(ab, " obj=(none)"); rc = 1; } else { audit_log_format(ab, " obj=%s", ctx); security_release_secctx(ctx, len); } } audit_log_format(ab, " ocomm="); audit_log_untrustedstring(ab, comm); audit_log_end(ab); return rc; } static void audit_log_execve_info(struct audit_context *context, struct audit_buffer **ab) { long len_max; long len_rem; long len_full; long len_buf; long len_abuf = 0; long len_tmp; bool require_data; bool encode; unsigned int iter; unsigned int arg; char *buf_head; char *buf; const char __user *p = (const char __user *)current->mm->arg_start; /* NOTE: this buffer needs to be large enough to hold all the non-arg * data we put in the audit record for this argument (see the * code below) ... at this point in time 96 is plenty */ char abuf[96]; /* NOTE: we set MAX_EXECVE_AUDIT_LEN to a rather arbitrary limit, the * current value of 7500 is not as important as the fact that it * is less than 8k, a setting of 7500 gives us plenty of wiggle * room if we go over a little bit in the logging below */ WARN_ON_ONCE(MAX_EXECVE_AUDIT_LEN > 7500); len_max = MAX_EXECVE_AUDIT_LEN; /* scratch buffer to hold the userspace args */ buf_head = kmalloc(MAX_EXECVE_AUDIT_LEN + 1, GFP_KERNEL); if (!buf_head) { audit_panic("out of memory for argv string"); return; } buf = buf_head; audit_log_format(*ab, "argc=%d", context->execve.argc); len_rem = len_max; len_buf = 0; len_full = 0; require_data = true; encode = false; iter = 0; arg = 0; do { /* NOTE: we don't ever want to trust this value for anything * serious, but the audit record format insists we * provide an argument length for really long arguments, * e.g. > MAX_EXECVE_AUDIT_LEN, so we have no choice but * to use strncpy_from_user() to obtain this value for * recording in the log, although we don't use it * anywhere here to avoid a double-fetch problem */ if (len_full == 0) len_full = strnlen_user(p, MAX_ARG_STRLEN) - 1; /* read more data from userspace */ if (require_data) { /* can we make more room in the buffer? */ if (buf != buf_head) { memmove(buf_head, buf, len_buf); buf = buf_head; } /* fetch as much as we can of the argument */ len_tmp = strncpy_from_user(&buf_head[len_buf], p, len_max - len_buf); if (len_tmp == -EFAULT) { /* unable to copy from userspace */ send_sig(SIGKILL, current, 0); goto out; } else if (len_tmp == (len_max - len_buf)) { /* buffer is not large enough */ require_data = true; /* NOTE: if we are going to span multiple * buffers force the encoding so we stand * a chance at a sane len_full value and * consistent record encoding */ encode = true; len_full = len_full * 2; p += len_tmp; } else { require_data = false; if (!encode) encode = audit_string_contains_control( buf, len_tmp); /* try to use a trusted value for len_full */ if (len_full < len_max) len_full = (encode ? len_tmp * 2 : len_tmp); p += len_tmp + 1; } len_buf += len_tmp; buf_head[len_buf] = '\0'; /* length of the buffer in the audit record? */ len_abuf = (encode ? len_buf * 2 : len_buf + 2); } /* write as much as we can to the audit log */ if (len_buf >= 0) { /* NOTE: some magic numbers here - basically if we * can't fit a reasonable amount of data into the * existing audit buffer, flush it and start with * a new buffer */ if ((sizeof(abuf) + 8) > len_rem) { len_rem = len_max; audit_log_end(*ab); *ab = audit_log_start(context, GFP_KERNEL, AUDIT_EXECVE); if (!*ab) goto out; } /* create the non-arg portion of the arg record */ len_tmp = 0; if (require_data || (iter > 0) || ((len_abuf + sizeof(abuf)) > len_rem)) { if (iter == 0) { len_tmp += snprintf(&abuf[len_tmp], sizeof(abuf) - len_tmp, " a%d_len=%lu", arg, len_full); } len_tmp += snprintf(&abuf[len_tmp], sizeof(abuf) - len_tmp, " a%d[%d]=", arg, iter++); } else len_tmp += snprintf(&abuf[len_tmp], sizeof(abuf) - len_tmp, " a%d=", arg); WARN_ON(len_tmp >= sizeof(abuf)); abuf[sizeof(abuf) - 1] = '\0'; /* log the arg in the audit record */ audit_log_format(*ab, "%s", abuf); len_rem -= len_tmp; len_tmp = len_buf; if (encode) { if (len_abuf > len_rem) len_tmp = len_rem / 2; /* encoding */ audit_log_n_hex(*ab, buf, len_tmp); len_rem -= len_tmp * 2; len_abuf -= len_tmp * 2; } else { if (len_abuf > len_rem) len_tmp = len_rem - 2; /* quotes */ audit_log_n_string(*ab, buf, len_tmp); len_rem -= len_tmp + 2; /* don't subtract the "2" because we still need * to add quotes to the remaining string */ len_abuf -= len_tmp; } len_buf -= len_tmp; buf += len_tmp; } /* ready to move to the next argument? */ if ((len_buf == 0) && !require_data) { arg++; iter = 0; len_full = 0; require_data = true; encode = false; } } while (arg < context->execve.argc); /* NOTE: the caller handles the final audit_log_end() call */ out: kfree(buf_head); } static void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap) { if (cap_isclear(*cap)) { audit_log_format(ab, " %s=0", prefix); return; } audit_log_format(ab, " %s=%016llx", prefix, cap->val); } static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) { if (name->fcap_ver == -1) { audit_log_format(ab, " cap_fe=? cap_fver=? cap_fp=? cap_fi=?"); return; } audit_log_cap(ab, "cap_fp", &name->fcap.permitted); audit_log_cap(ab, "cap_fi", &name->fcap.inheritable); audit_log_format(ab, " cap_fe=%d cap_fver=%x cap_frootid=%d", name->fcap.fE, name->fcap_ver, from_kuid(&init_user_ns, name->fcap.rootid)); } static void audit_log_time(struct audit_context *context, struct audit_buffer **ab) { const struct audit_ntp_data *ntp = &context->time.ntp_data; const struct timespec64 *tk = &context->time.tk_injoffset; static const char * const ntp_name[] = { "offset", "freq", "status", "tai", "tick", "adjust", }; int type; if (context->type == AUDIT_TIME_ADJNTPVAL) { for (type = 0; type < AUDIT_NTP_NVALS; type++) { if (ntp->vals[type].newval != ntp->vals[type].oldval) { if (!*ab) { *ab = audit_log_start(context, GFP_KERNEL, AUDIT_TIME_ADJNTPVAL); if (!*ab) return; } audit_log_format(*ab, "op=%s old=%lli new=%lli", ntp_name[type], ntp->vals[type].oldval, ntp->vals[type].newval); audit_log_end(*ab); *ab = NULL; } } } if (tk->tv_sec != 0 || tk->tv_nsec != 0) { if (!*ab) { *ab = audit_log_start(context, GFP_KERNEL, AUDIT_TIME_INJOFFSET); if (!*ab) return; } audit_log_format(*ab, "sec=%lli nsec=%li", (long long)tk->tv_sec, tk->tv_nsec); audit_log_end(*ab); *ab = NULL; } } static void show_special(struct audit_context *context, int *call_panic) { struct audit_buffer *ab; int i; ab = audit_log_start(context, GFP_KERNEL, context->type); if (!ab) return; switch (context->type) { case AUDIT_SOCKETCALL: { int nargs = context->socketcall.nargs; audit_log_format(ab, "nargs=%d", nargs); for (i = 0; i < nargs; i++) audit_log_format(ab, " a%d=%lx", i, context->socketcall.args[i]); break; } case AUDIT_IPC: { u32 osid = context->ipc.osid; audit_log_format(ab, "ouid=%u ogid=%u mode=%#ho", from_kuid(&init_user_ns, context->ipc.uid), from_kgid(&init_user_ns, context->ipc.gid), context->ipc.mode); if (osid) { char *ctx = NULL; u32 len; if (security_secid_to_secctx(osid, &ctx, &len)) { audit_log_format(ab, " osid=%u", osid); *call_panic = 1; } else { audit_log_format(ab, " obj=%s", ctx); security_release_secctx(ctx, len); } } if (context->ipc.has_perm) { audit_log_end(ab); ab = audit_log_start(context, GFP_KERNEL, AUDIT_IPC_SET_PERM); if (unlikely(!ab)) return; audit_log_format(ab, "qbytes=%lx ouid=%u ogid=%u mode=%#ho", context->ipc.qbytes, context->ipc.perm_uid, context->ipc.perm_gid, context->ipc.perm_mode); } break; } case AUDIT_MQ_OPEN: audit_log_format(ab, "oflag=0x%x mode=%#ho mq_flags=0x%lx mq_maxmsg=%ld " "mq_msgsize=%ld mq_curmsgs=%ld", context->mq_open.oflag, context->mq_open.mode, context->mq_open.attr.mq_flags, context->mq_open.attr.mq_maxmsg, context->mq_open.attr.mq_msgsize, context->mq_open.attr.mq_curmsgs); break; case AUDIT_MQ_SENDRECV: audit_log_format(ab, "mqdes=%d msg_len=%zd msg_prio=%u " "abs_timeout_sec=%lld abs_timeout_nsec=%ld", context->mq_sendrecv.mqdes, context->mq_sendrecv.msg_len, context->mq_sendrecv.msg_prio, (long long) context->mq_sendrecv.abs_timeout.tv_sec, context->mq_sendrecv.abs_timeout.tv_nsec); break; case AUDIT_MQ_NOTIFY: audit_log_format(ab, "mqdes=%d sigev_signo=%d", context->mq_notify.mqdes, context->mq_notify.sigev_signo); break; case AUDIT_MQ_GETSETATTR: { struct mq_attr *attr = &context->mq_getsetattr.mqstat; audit_log_format(ab, "mqdes=%d mq_flags=0x%lx mq_maxmsg=%ld mq_msgsize=%ld " "mq_curmsgs=%ld ", context->mq_getsetattr.mqdes, attr->mq_flags, attr->mq_maxmsg, attr->mq_msgsize, attr->mq_curmsgs); break; } case AUDIT_CAPSET: audit_log_format(ab, "pid=%d", context->capset.pid); audit_log_cap(ab, "cap_pi", &context->capset.cap.inheritable); audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted); audit_log_cap(ab, "cap_pe", &context->capset.cap.effective); audit_log_cap(ab, "cap_pa", &context->capset.cap.ambient); break; case AUDIT_MMAP: audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd, context->mmap.flags); break; case AUDIT_OPENAT2: audit_log_format(ab, "oflag=0%llo mode=0%llo resolve=0x%llx", context->openat2.flags, context->openat2.mode, context->openat2.resolve); break; case AUDIT_EXECVE: audit_log_execve_info(context, &ab); break; case AUDIT_KERN_MODULE: audit_log_format(ab, "name="); if (context->module.name) { audit_log_untrustedstring(ab, context->module.name); } else audit_log_format(ab, "(null)"); break; case AUDIT_TIME_ADJNTPVAL: case AUDIT_TIME_INJOFFSET: /* this call deviates from the rest, eating the buffer */ audit_log_time(context, &ab); break; } audit_log_end(ab); } static inline int audit_proctitle_rtrim(char *proctitle, int len) { char *end = proctitle + len - 1; while (end > proctitle && !isprint(*end)) end--; /* catch the case where proctitle is only 1 non-print character */ len = end - proctitle + 1; len -= isprint(proctitle[len-1]) == 0; return len; } /* * audit_log_name - produce AUDIT_PATH record from struct audit_names * @context: audit_context for the task * @n: audit_names structure with reportable details * @path: optional path to report instead of audit_names->name * @record_num: record number to report when handling a list of names * @call_panic: optional pointer to int that will be updated if secid fails */ static void audit_log_name(struct audit_context *context, struct audit_names *n, const struct path *path, int record_num, int *call_panic) { struct audit_buffer *ab; ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); if (!ab) return; audit_log_format(ab, "item=%d", record_num); if (path) audit_log_d_path(ab, " name=", path); else if (n->name) { switch (n->name_len) { case AUDIT_NAME_FULL: /* log the full path */ audit_log_format(ab, " name="); audit_log_untrustedstring(ab, n->name->name); break; case 0: /* name was specified as a relative path and the * directory component is the cwd */ if (context->pwd.dentry && context->pwd.mnt) audit_log_d_path(ab, " name=", &context->pwd); else audit_log_format(ab, " name=(null)"); break; default: /* log the name's directory component */ audit_log_format(ab, " name="); audit_log_n_untrustedstring(ab, n->name->name, n->name_len); } } else audit_log_format(ab, " name=(null)"); if (n->ino != AUDIT_INO_UNSET) audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#ho ouid=%u ogid=%u rdev=%02x:%02x", n->ino, MAJOR(n->dev), MINOR(n->dev), n->mode, from_kuid(&init_user_ns, n->uid), from_kgid(&init_user_ns, n->gid), MAJOR(n->rdev), MINOR(n->rdev)); if (n->osid != 0) { char *ctx = NULL; u32 len; if (security_secid_to_secctx( n->osid, &ctx, &len)) { audit_log_format(ab, " osid=%u", n->osid); if (call_panic) *call_panic = 2; } else { audit_log_format(ab, " obj=%s", ctx); security_release_secctx(ctx, len); } } /* log the audit_names record type */ switch (n->type) { case AUDIT_TYPE_NORMAL: audit_log_format(ab, " nametype=NORMAL"); break; case AUDIT_TYPE_PARENT: audit_log_format(ab, " nametype=PARENT"); break; case AUDIT_TYPE_CHILD_DELETE: audit_log_format(ab, " nametype=DELETE"); break; case AUDIT_TYPE_CHILD_CREATE: audit_log_format(ab, " nametype=CREATE"); break; default: audit_log_format(ab, " nametype=UNKNOWN"); break; } audit_log_fcaps(ab, n); audit_log_end(ab); } static void audit_log_proctitle(void) { int res; char *buf; char *msg = "(null)"; int len = strlen(msg); struct audit_context *context = audit_context(); struct audit_buffer *ab; ab = audit_log_start(context, GFP_KERNEL, AUDIT_PROCTITLE); if (!ab) return; /* audit_panic or being filtered */ audit_log_format(ab, "proctitle="); /* Not cached */ if (!context->proctitle.value) { buf = kmalloc(MAX_PROCTITLE_AUDIT_LEN, GFP_KERNEL); if (!buf) goto out; /* Historically called this from procfs naming */ res = get_cmdline(current, buf, MAX_PROCTITLE_AUDIT_LEN); if (res == 0) { kfree(buf); goto out; } res = audit_proctitle_rtrim(buf, res); if (res == 0) { kfree(buf); goto out; } context->proctitle.value = buf; context->proctitle.len = res; } msg = context->proctitle.value; len = context->proctitle.len; out: audit_log_n_untrustedstring(ab, msg, len); audit_log_end(ab); } /** * audit_log_uring - generate a AUDIT_URINGOP record * @ctx: the audit context */ static void audit_log_uring(struct audit_context *ctx) { struct audit_buffer *ab; const struct cred *cred; ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_URINGOP); if (!ab) return; cred = current_cred(); audit_log_format(ab, "uring_op=%d", ctx->uring_op); if (ctx->return_valid != AUDITSC_INVALID) audit_log_format(ab, " success=%s exit=%ld", (ctx->return_valid == AUDITSC_SUCCESS ? "yes" : "no"), ctx->return_code); audit_log_format(ab, " items=%d" " ppid=%d pid=%d uid=%u gid=%u euid=%u suid=%u" " fsuid=%u egid=%u sgid=%u fsgid=%u", ctx->name_count, task_ppid_nr(current), task_tgid_nr(current), from_kuid(&init_user_ns, cred->uid), from_kgid(&init_user_ns, cred->gid), from_kuid(&init_user_ns, cred->euid), from_kuid(&init_user_ns, cred->suid), from_kuid(&init_user_ns, cred->fsuid), from_kgid(&init_user_ns, cred->egid), from_kgid(&init_user_ns, cred->sgid), from_kgid(&init_user_ns, cred->fsgid)); audit_log_task_context(ab); audit_log_key(ab, ctx->filterkey); audit_log_end(ab); } static void audit_log_exit(void) { int i, call_panic = 0; struct audit_context *context = audit_context(); struct audit_buffer *ab; struct audit_aux_data *aux; struct audit_names *n; context->personality = current->personality; switch (context->context) { case AUDIT_CTX_SYSCALL: ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); if (!ab) return; audit_log_format(ab, "arch=%x syscall=%d", context->arch, context->major); if (context->personality != PER_LINUX) audit_log_format(ab, " per=%lx", context->personality); if (context->return_valid != AUDITSC_INVALID) audit_log_format(ab, " success=%s exit=%ld", (context->return_valid == AUDITSC_SUCCESS ? "yes" : "no"), context->return_code); audit_log_format(ab, " a0=%lx a1=%lx a2=%lx a3=%lx items=%d", context->argv[0], context->argv[1], context->argv[2], context->argv[3], context->name_count); audit_log_task_info(ab); audit_log_key(ab, context->filterkey); audit_log_end(ab); break; case AUDIT_CTX_URING: audit_log_uring(context); break; default: BUG(); break; } for (aux = context->aux; aux; aux = aux->next) { ab = audit_log_start(context, GFP_KERNEL, aux->type); if (!ab) continue; /* audit_panic has been called */ switch (aux->type) { case AUDIT_BPRM_FCAPS: { struct audit_aux_data_bprm_fcaps *axs = (void *)aux; audit_log_format(ab, "fver=%x", axs->fcap_ver); audit_log_cap(ab, "fp", &axs->fcap.permitted); audit_log_cap(ab, "fi", &axs->fcap.inheritable); audit_log_format(ab, " fe=%d", axs->fcap.fE); audit_log_cap(ab, "old_pp", &axs->old_pcap.permitted); audit_log_cap(ab, "old_pi", &axs->old_pcap.inheritable); audit_log_cap(ab, "old_pe", &axs->old_pcap.effective); audit_log_cap(ab, "old_pa", &axs->old_pcap.ambient); audit_log_cap(ab, "pp", &axs->new_pcap.permitted); audit_log_cap(ab, "pi", &axs->new_pcap.inheritable); audit_log_cap(ab, "pe", &axs->new_pcap.effective); audit_log_cap(ab, "pa", &axs->new_pcap.ambient); audit_log_format(ab, " frootid=%d", from_kuid(&init_user_ns, axs->fcap.rootid)); break; } } audit_log_end(ab); } if (context->type) show_special(context, &call_panic); if (context->fds[0] >= 0) { ab = audit_log_start(context, GFP_KERNEL, AUDIT_FD_PAIR); if (ab) { audit_log_format(ab, "fd0=%d fd1=%d", context->fds[0], context->fds[1]); audit_log_end(ab); } } if (context->sockaddr_len) { ab = audit_log_start(context, GFP_KERNEL, AUDIT_SOCKADDR); if (ab) { audit_log_format(ab, "saddr="); audit_log_n_hex(ab, (void *)context->sockaddr, context->sockaddr_len); audit_log_end(ab); } } for (aux = context->aux_pids; aux; aux = aux->next) { struct audit_aux_data_pids *axs = (void *)aux; for (i = 0; i < axs->pid_count; i++) if (audit_log_pid_context(context, axs->target_pid[i], axs->target_auid[i], axs->target_uid[i], axs->target_sessionid[i], axs->target_sid[i], axs->target_comm[i])) call_panic = 1; } if (context->target_pid && audit_log_pid_context(context, context->target_pid, context->target_auid, context->target_uid, context->target_sessionid, context->target_sid, context->target_comm)) call_panic = 1; if (context->pwd.dentry && context->pwd.mnt) { ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); if (ab) { audit_log_d_path(ab, "cwd=", &context->pwd); audit_log_end(ab); } } i = 0; list_for_each_entry(n, &context->names_list, list) { if (n->hidden) continue; audit_log_name(context, n, NULL, i++, &call_panic); } if (context->context == AUDIT_CTX_SYSCALL) audit_log_proctitle(); /* Send end of event record to help user space know we are finished */ ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); if (ab) audit_log_end(ab); if (call_panic) audit_panic("error in audit_log_exit()"); } /** * __audit_free - free a per-task audit context * @tsk: task whose audit context block to free * * Called from copy_process, do_exit, and the io_uring code */ void __audit_free(struct task_struct *tsk) { struct audit_context *context = tsk->audit_context; if (!context) return; /* this may generate CONFIG_CHANGE records */ if (!list_empty(&context->killed_trees)) audit_kill_trees(context); /* We are called either by do_exit() or the fork() error handling code; * in the former case tsk == current and in the latter tsk is a * random task_struct that doesn't have any meaningful data we * need to log via audit_log_exit(). */ if (tsk == current && !context->dummy) { context->return_valid = AUDITSC_INVALID; context->return_code = 0; if (context->context == AUDIT_CTX_SYSCALL) { audit_filter_syscall(tsk, context); audit_filter_inodes(tsk, context); if (context->current_state == AUDIT_STATE_RECORD) audit_log_exit(); } else if (context->context == AUDIT_CTX_URING) { /* TODO: verify this case is real and valid */ audit_filter_uring(tsk, context); audit_filter_inodes(tsk, context); if (context->current_state == AUDIT_STATE_RECORD) audit_log_uring(context); } } audit_set_context(tsk, NULL); audit_free_context(context); } /** * audit_return_fixup - fixup the return codes in the audit_context * @ctx: the audit_context * @success: true/false value to indicate if the operation succeeded or not * @code: operation return code * * We need to fixup the return code in the audit logs if the actual return * codes are later going to be fixed by the arch specific signal handlers. */ static void audit_return_fixup(struct audit_context *ctx, int success, long code) { /* * This is actually a test for: * (rc == ERESTARTSYS ) || (rc == ERESTARTNOINTR) || * (rc == ERESTARTNOHAND) || (rc == ERESTART_RESTARTBLOCK) * * but is faster than a bunch of || */ if (unlikely(code <= -ERESTARTSYS) && (code >= -ERESTART_RESTARTBLOCK) && (code != -ENOIOCTLCMD)) ctx->return_code = -EINTR; else ctx->return_code = code; ctx->return_valid = (success ? AUDITSC_SUCCESS : AUDITSC_FAILURE); } /** * __audit_uring_entry - prepare the kernel task's audit context for io_uring * @op: the io_uring opcode * * This is similar to audit_syscall_entry() but is intended for use by io_uring * operations. This function should only ever be called from * audit_uring_entry() as we rely on the audit context checking present in that * function. */ void __audit_uring_entry(u8 op) { struct audit_context *ctx = audit_context(); if (ctx->state == AUDIT_STATE_DISABLED) return; /* * NOTE: It's possible that we can be called from the process' context * before it returns to userspace, and before audit_syscall_exit() * is called. In this case there is not much to do, just record * the io_uring details and return. */ ctx->uring_op = op; if (ctx->context == AUDIT_CTX_SYSCALL) return; ctx->dummy = !audit_n_rules; if (!ctx->dummy && ctx->state == AUDIT_STATE_BUILD) ctx->prio = 0; ctx->context = AUDIT_CTX_URING; ctx->current_state = ctx->state; ktime_get_coarse_real_ts64(&ctx->ctime); } /** * __audit_uring_exit - wrap up the kernel task's audit context after io_uring * @success: true/false value to indicate if the operation succeeded or not * @code: operation return code * * This is similar to audit_syscall_exit() but is intended for use by io_uring * operations. This function should only ever be called from * audit_uring_exit() as we rely on the audit context checking present in that * function. */ void __audit_uring_exit(int success, long code) { struct audit_context *ctx = audit_context(); if (ctx->dummy) { if (ctx->context != AUDIT_CTX_URING) return; goto out; } audit_return_fixup(ctx, success, code); if (ctx->context == AUDIT_CTX_SYSCALL) { /* * NOTE: See the note in __audit_uring_entry() about the case * where we may be called from process context before we * return to userspace via audit_syscall_exit(). In this * case we simply emit a URINGOP record and bail, the * normal syscall exit handling will take care of * everything else. * It is also worth mentioning that when we are called, * the current process creds may differ from the creds * used during the normal syscall processing; keep that * in mind if/when we move the record generation code. */ /* * We need to filter on the syscall info here to decide if we * should emit a URINGOP record. I know it seems odd but this * solves the problem where users have a filter to block *all* * syscall records in the "exit" filter; we want to preserve * the behavior here. */ audit_filter_syscall(current, ctx); if (ctx->current_state != AUDIT_STATE_RECORD) audit_filter_uring(current, ctx); audit_filter_inodes(current, ctx); if (ctx->current_state != AUDIT_STATE_RECORD) return; audit_log_uring(ctx); return; } /* this may generate CONFIG_CHANGE records */ if (!list_empty(&ctx->killed_trees)) audit_kill_trees(ctx); /* run through both filters to ensure we set the filterkey properly */ audit_filter_uring(current, ctx); audit_filter_inodes(current, ctx); if (ctx->current_state != AUDIT_STATE_RECORD) goto out; audit_log_exit(); out: audit_reset_context(ctx); } /** * __audit_syscall_entry - fill in an audit record at syscall entry * @major: major syscall type (function) * @a1: additional syscall register 1 * @a2: additional syscall register 2 * @a3: additional syscall register 3 * @a4: additional syscall register 4 * * Fill in audit context at syscall entry. This only happens if the * audit context was created when the task was created and the state or * filters demand the audit context be built. If the state from the * per-task filter or from the per-syscall filter is AUDIT_STATE_RECORD, * then the record will be written at syscall exit time (otherwise, it * will only be written if another part of the kernel requests that it * be written). */ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4) { struct audit_context *context = audit_context(); enum audit_state state; if (!audit_enabled || !context) return; WARN_ON(context->context != AUDIT_CTX_UNUSED); WARN_ON(context->name_count); if (context->context != AUDIT_CTX_UNUSED || context->name_count) { audit_panic("unrecoverable error in audit_syscall_entry()"); return; } state = context->state; if (state == AUDIT_STATE_DISABLED) return; context->dummy = !audit_n_rules; if (!context->dummy && state == AUDIT_STATE_BUILD) { context->prio = 0; if (auditd_test_task(current)) return; } context->arch = syscall_get_arch(current); context->major = major; context->argv[0] = a1; context->argv[1] = a2; context->argv[2] = a3; context->argv[3] = a4; context->context = AUDIT_CTX_SYSCALL; context->current_state = state; ktime_get_coarse_real_ts64(&context->ctime); } /** * __audit_syscall_exit - deallocate audit context after a system call * @success: success value of the syscall * @return_code: return value of the syscall * * Tear down after system call. If the audit context has been marked as * auditable (either because of the AUDIT_STATE_RECORD state from * filtering, or because some other part of the kernel wrote an audit * message), then write out the syscall information. In call cases, * free the names stored from getname(). */ void __audit_syscall_exit(int success, long return_code) { struct audit_context *context = audit_context(); if (!context || context->dummy || context->context != AUDIT_CTX_SYSCALL) goto out; /* this may generate CONFIG_CHANGE records */ if (!list_empty(&context->killed_trees)) audit_kill_trees(context); audit_return_fixup(context, success, return_code); /* run through both filters to ensure we set the filterkey properly */ audit_filter_syscall(current, context); audit_filter_inodes(current, context); if (context->current_state != AUDIT_STATE_RECORD) goto out; audit_log_exit(); out: audit_reset_context(context); } static inline void handle_one(const struct inode *inode) { struct audit_context *context; struct audit_tree_refs *p; struct audit_chunk *chunk; int count; if (likely(!inode->i_fsnotify_marks)) return; context = audit_context(); p = context->trees; count = context->tree_count; rcu_read_lock(); chunk = audit_tree_lookup(inode); rcu_read_unlock(); if (!chunk) return; if (likely(put_tree_ref(context, chunk))) return; if (unlikely(!grow_tree_refs(context))) { pr_warn("out of memory, audit has lost a tree reference\n"); audit_set_auditable(context); audit_put_chunk(chunk); unroll_tree_refs(context, p, count); return; } put_tree_ref(context, chunk); } static void handle_path(const struct dentry *dentry) { struct audit_context *context; struct audit_tree_refs *p; const struct dentry *d, *parent; struct audit_chunk *drop; unsigned long seq; int count; context = audit_context(); p = context->trees; count = context->tree_count; retry: drop = NULL; d = dentry; rcu_read_lock(); seq = read_seqbegin(&rename_lock); for (;;) { struct inode *inode = d_backing_inode(d); if (inode && unlikely(inode->i_fsnotify_marks)) { struct audit_chunk *chunk; chunk = audit_tree_lookup(inode); if (chunk) { if (unlikely(!put_tree_ref(context, chunk))) { drop = chunk; break; } } } parent = d->d_parent; if (parent == d) break; d = parent; } if (unlikely(read_seqretry(&rename_lock, seq) || drop)) { /* in this order */ rcu_read_unlock(); if (!drop) { /* just a race with rename */ unroll_tree_refs(context, p, count); goto retry; } audit_put_chunk(drop); if (grow_tree_refs(context)) { /* OK, got more space */ unroll_tree_refs(context, p, count); goto retry; } /* too bad */ pr_warn("out of memory, audit has lost a tree reference\n"); unroll_tree_refs(context, p, count); audit_set_auditable(context); return; } rcu_read_unlock(); } static struct audit_names *audit_alloc_name(struct audit_context *context, unsigned char type) { struct audit_names *aname; if (context->name_count < AUDIT_NAMES) { aname = &context->preallocated_names[context->name_count]; memset(aname, 0, sizeof(*aname)); } else { aname = kzalloc(sizeof(*aname), GFP_NOFS); if (!aname) return NULL; aname->should_free = true; } aname->ino = AUDIT_INO_UNSET; aname->type = type; list_add_tail(&aname->list, &context->names_list); context->name_count++; if (!context->pwd.dentry) get_fs_pwd(current->fs, &context->pwd); return aname; } /** * __audit_reusename - fill out filename with info from existing entry * @uptr: userland ptr to pathname * * Search the audit_names list for the current audit context. If there is an * existing entry with a matching "uptr" then return the filename * associated with that audit_name. If not, return NULL. */ struct filename * __audit_reusename(const __user char *uptr) { struct audit_context *context = audit_context(); struct audit_names *n; list_for_each_entry(n, &context->names_list, list) { if (!n->name) continue; if (n->name->uptr == uptr) { atomic_inc(&n->name->refcnt); return n->name; } } return NULL; } /** * __audit_getname - add a name to the list * @name: name to add * * Add a name to the list of audit names for this context. * Called from fs/namei.c:getname(). */ void __audit_getname(struct filename *name) { struct audit_context *context = audit_context(); struct audit_names *n; if (context->context == AUDIT_CTX_UNUSED) return; n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); if (!n) return; n->name = name; n->name_len = AUDIT_NAME_FULL; name->aname = n; atomic_inc(&name->refcnt); } static inline int audit_copy_fcaps(struct audit_names *name, const struct dentry *dentry) { struct cpu_vfs_cap_data caps; int rc; if (!dentry) return 0; rc = get_vfs_caps_from_disk(&nop_mnt_idmap, dentry, &caps); if (rc) return rc; name->fcap.permitted = caps.permitted; name->fcap.inheritable = caps.inheritable; name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); name->fcap.rootid = caps.rootid; name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; return 0; } /* Copy inode data into an audit_names. */ static void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, struct inode *inode, unsigned int flags) { name->ino = inode->i_ino; name->dev = inode->i_sb->s_dev; name->mode = inode->i_mode; name->uid = inode->i_uid; name->gid = inode->i_gid; name->rdev = inode->i_rdev; security_inode_getsecid(inode, &name->osid); if (flags & AUDIT_INODE_NOEVAL) { name->fcap_ver = -1; return; } audit_copy_fcaps(name, dentry); } /** * __audit_inode - store the inode and device from a lookup * @name: name being audited * @dentry: dentry being audited * @flags: attributes for this particular entry */ void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags) { struct audit_context *context = audit_context(); struct inode *inode = d_backing_inode(dentry); struct audit_names *n; bool parent = flags & AUDIT_INODE_PARENT; struct audit_entry *e; struct list_head *list = &audit_filter_list[AUDIT_FILTER_FS]; int i; if (context->context == AUDIT_CTX_UNUSED) return; rcu_read_lock(); list_for_each_entry_rcu(e, list, list) { for (i = 0; i < e->rule.field_count; i++) { struct audit_field *f = &e->rule.fields[i]; if (f->type == AUDIT_FSTYPE && audit_comparator(inode->i_sb->s_magic, f->op, f->val) && e->rule.action == AUDIT_NEVER) { rcu_read_unlock(); return; } } } rcu_read_unlock(); if (!name) goto out_alloc; /* * If we have a pointer to an audit_names entry already, then we can * just use it directly if the type is correct. */ n = name->aname; if (n) { if (parent) { if (n->type == AUDIT_TYPE_PARENT || n->type == AUDIT_TYPE_UNKNOWN) goto out; } else { if (n->type != AUDIT_TYPE_PARENT) goto out; } } list_for_each_entry_reverse(n, &context->names_list, list) { if (n->ino) { /* valid inode number, use that for the comparison */ if (n->ino != inode->i_ino || n->dev != inode->i_sb->s_dev) continue; } else if (n->name) { /* inode number has not been set, check the name */ if (strcmp(n->name->name, name->name)) continue; } else /* no inode and no name (?!) ... this is odd ... */ continue; /* match the correct record type */ if (parent) { if (n->type == AUDIT_TYPE_PARENT || n->type == AUDIT_TYPE_UNKNOWN) goto out; } else { if (n->type != AUDIT_TYPE_PARENT) goto out; } } out_alloc: /* unable to find an entry with both a matching name and type */ n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); if (!n) return; if (name) { n->name = name; atomic_inc(&name->refcnt); } out: if (parent) { n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL; n->type = AUDIT_TYPE_PARENT; if (flags & AUDIT_INODE_HIDDEN) n->hidden = true; } else { n->name_len = AUDIT_NAME_FULL; n->type = AUDIT_TYPE_NORMAL; } handle_path(dentry); audit_copy_inode(n, dentry, inode, flags & AUDIT_INODE_NOEVAL); } void __audit_file(const struct file *file) { __audit_inode(NULL, file->f_path.dentry, 0); } /** * __audit_inode_child - collect inode info for created/removed objects * @parent: inode of dentry parent * @dentry: dentry being audited * @type: AUDIT_TYPE_* value that we're looking for * * For syscalls that create or remove filesystem objects, audit_inode * can only collect information for the filesystem object's parent. * This call updates the audit context with the child's information. * Syscalls that create a new filesystem object must be hooked after * the object is created. Syscalls that remove a filesystem object * must be hooked prior, in order to capture the target inode during * unsuccessful attempts. */ void __audit_inode_child(struct inode *parent, const struct dentry *dentry, const unsigned char type) { struct audit_context *context = audit_context(); struct inode *inode = d_backing_inode(dentry); const struct qstr *dname = &dentry->d_name; struct audit_names *n, *found_parent = NULL, *found_child = NULL; struct audit_entry *e; struct list_head *list = &audit_filter_list[AUDIT_FILTER_FS]; int i; if (context->context == AUDIT_CTX_UNUSED) return; rcu_read_lock(); list_for_each_entry_rcu(e, list, list) { for (i = 0; i < e->rule.field_count; i++) { struct audit_field *f = &e->rule.fields[i]; if (f->type == AUDIT_FSTYPE && audit_comparator(parent->i_sb->s_magic, f->op, f->val) && e->rule.action == AUDIT_NEVER) { rcu_read_unlock(); return; } } } rcu_read_unlock(); if (inode) handle_one(inode); /* look for a parent entry first */ list_for_each_entry(n, &context->names_list, list) { if (!n->name || (n->type != AUDIT_TYPE_PARENT && n->type != AUDIT_TYPE_UNKNOWN)) continue; if (n->ino == parent->i_ino && n->dev == parent->i_sb->s_dev && !audit_compare_dname_path(dname, n->name->name, n->name_len)) { if (n->type == AUDIT_TYPE_UNKNOWN) n->type = AUDIT_TYPE_PARENT; found_parent = n; break; } } cond_resched(); /* is there a matching child entry? */ list_for_each_entry(n, &context->names_list, list) { /* can only match entries that have a name */ if (!n->name || (n->type != type && n->type != AUDIT_TYPE_UNKNOWN)) continue; if (!strcmp(dname->name, n->name->name) || !audit_compare_dname_path(dname, n->name->name, found_parent ? found_parent->name_len : AUDIT_NAME_FULL)) { if (n->type == AUDIT_TYPE_UNKNOWN) n->type = type; found_child = n; break; } } if (!found_parent) { /* create a new, "anonymous" parent record */ n = audit_alloc_name(context, AUDIT_TYPE_PARENT); if (!n) return; audit_copy_inode(n, NULL, parent, 0); } if (!found_child) { found_child = audit_alloc_name(context, type); if (!found_child) return; /* Re-use the name belonging to the slot for a matching parent * directory. All names for this context are relinquished in * audit_free_names() */ if (found_parent) { found_child->name = found_parent->name; found_child->name_len = AUDIT_NAME_FULL; atomic_inc(&found_child->name->refcnt); } } if (inode) audit_copy_inode(found_child, dentry, inode, 0); else found_child->ino = AUDIT_INO_UNSET; } EXPORT_SYMBOL_GPL(__audit_inode_child); /** * auditsc_get_stamp - get local copies of audit_context values * @ctx: audit_context for the task * @t: timespec64 to store time recorded in the audit_context * @serial: serial value that is recorded in the audit_context * * Also sets the context as auditable. */ int auditsc_get_stamp(struct audit_context *ctx, struct timespec64 *t, unsigned int *serial) { if (ctx->context == AUDIT_CTX_UNUSED) return 0; if (!ctx->serial) ctx->serial = audit_serial(); t->tv_sec = ctx->ctime.tv_sec; t->tv_nsec = ctx->ctime.tv_nsec; *serial = ctx->serial; if (!ctx->prio) { ctx->prio = 1; ctx->current_state = AUDIT_STATE_RECORD; } return 1; } /** * __audit_mq_open - record audit data for a POSIX MQ open * @oflag: open flag * @mode: mode bits * @attr: queue attributes * */ void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr) { struct audit_context *context = audit_context(); if (attr) memcpy(&context->mq_open.attr, attr, sizeof(struct mq_attr)); else memset(&context->mq_open.attr, 0, sizeof(struct mq_attr)); context->mq_open.oflag = oflag; context->mq_open.mode = mode; context->type = AUDIT_MQ_OPEN; } /** * __audit_mq_sendrecv - record audit data for a POSIX MQ timed send/receive * @mqdes: MQ descriptor * @msg_len: Message length * @msg_prio: Message priority * @abs_timeout: Message timeout in absolute time * */ void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec64 *abs_timeout) { struct audit_context *context = audit_context(); struct timespec64 *p = &context->mq_sendrecv.abs_timeout; if (abs_timeout) memcpy(p, abs_timeout, sizeof(*p)); else memset(p, 0, sizeof(*p)); context->mq_sendrecv.mqdes = mqdes; context->mq_sendrecv.msg_len = msg_len; context->mq_sendrecv.msg_prio = msg_prio; context->type = AUDIT_MQ_SENDRECV; } /** * __audit_mq_notify - record audit data for a POSIX MQ notify * @mqdes: MQ descriptor * @notification: Notification event * */ void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification) { struct audit_context *context = audit_context(); if (notification) context->mq_notify.sigev_signo = notification->sigev_signo; else context->mq_notify.sigev_signo = 0; context->mq_notify.mqdes = mqdes; context->type = AUDIT_MQ_NOTIFY; } /** * __audit_mq_getsetattr - record audit data for a POSIX MQ get/set attribute * @mqdes: MQ descriptor * @mqstat: MQ flags * */ void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) { struct audit_context *context = audit_context(); context->mq_getsetattr.mqdes = mqdes; context->mq_getsetattr.mqstat = *mqstat; context->type = AUDIT_MQ_GETSETATTR; } /** * __audit_ipc_obj - record audit data for ipc object * @ipcp: ipc permissions * */ void __audit_ipc_obj(struct kern_ipc_perm *ipcp) { struct audit_context *context = audit_context(); context->ipc.uid = ipcp->uid; context->ipc.gid = ipcp->gid; context->ipc.mode = ipcp->mode; context->ipc.has_perm = 0; security_ipc_getsecid(ipcp, &context->ipc.osid); context->type = AUDIT_IPC; } /** * __audit_ipc_set_perm - record audit data for new ipc permissions * @qbytes: msgq bytes * @uid: msgq user id * @gid: msgq group id * @mode: msgq mode (permissions) * * Called only after audit_ipc_obj(). */ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode) { struct audit_context *context = audit_context(); context->ipc.qbytes = qbytes; context->ipc.perm_uid = uid; context->ipc.perm_gid = gid; context->ipc.perm_mode = mode; context->ipc.has_perm = 1; } void __audit_bprm(struct linux_binprm *bprm) { struct audit_context *context = audit_context(); context->type = AUDIT_EXECVE; context->execve.argc = bprm->argc; } /** * __audit_socketcall - record audit data for sys_socketcall * @nargs: number of args, which should not be more than AUDITSC_ARGS. * @args: args array * */ int __audit_socketcall(int nargs, unsigned long *args) { struct audit_context *context = audit_context(); if (nargs <= 0 || nargs > AUDITSC_ARGS || !args) return -EINVAL; context->type = AUDIT_SOCKETCALL; context->socketcall.nargs = nargs; memcpy(context->socketcall.args, args, nargs * sizeof(unsigned long)); return 0; } /** * __audit_fd_pair - record audit data for pipe and socketpair * @fd1: the first file descriptor * @fd2: the second file descriptor * */ void __audit_fd_pair(int fd1, int fd2) { struct audit_context *context = audit_context(); context->fds[0] = fd1; context->fds[1] = fd2; } /** * __audit_sockaddr - record audit data for sys_bind, sys_connect, sys_sendto * @len: data length in user space * @a: data address in kernel space * * Returns 0 for success or NULL context or < 0 on error. */ int __audit_sockaddr(int len, void *a) { struct audit_context *context = audit_context(); if (!context->sockaddr) { void *p = kmalloc(sizeof(struct sockaddr_storage), GFP_KERNEL); if (!p) return -ENOMEM; context->sockaddr = p; } context->sockaddr_len = len; memcpy(context->sockaddr, a, len); return 0; } void __audit_ptrace(struct task_struct *t) { struct audit_context *context = audit_context(); context->target_pid = task_tgid_nr(t); context->target_auid = audit_get_loginuid(t); context->target_uid = task_uid(t); context->target_sessionid = audit_get_sessionid(t); security_task_getsecid_obj(t, &context->target_sid); memcpy(context->target_comm, t->comm, TASK_COMM_LEN); } /** * audit_signal_info_syscall - record signal info for syscalls * @t: task being signaled * * If the audit subsystem is being terminated, record the task (pid) * and uid that is doing that. */ int audit_signal_info_syscall(struct task_struct *t) { struct audit_aux_data_pids *axp; struct audit_context *ctx = audit_context(); kuid_t t_uid = task_uid(t); if (!audit_signals || audit_dummy_context()) return 0; /* optimize the common case by putting first signal recipient directly * in audit_context */ if (!ctx->target_pid) { ctx->target_pid = task_tgid_nr(t); ctx->target_auid = audit_get_loginuid(t); ctx->target_uid = t_uid; ctx->target_sessionid = audit_get_sessionid(t); security_task_getsecid_obj(t, &ctx->target_sid); memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN); return 0; } axp = (void *)ctx->aux_pids; if (!axp || axp->pid_count == AUDIT_AUX_PIDS) { axp = kzalloc(sizeof(*axp), GFP_ATOMIC); if (!axp) return -ENOMEM; axp->d.type = AUDIT_OBJ_PID; axp->d.next = ctx->aux_pids; ctx->aux_pids = (void *)axp; } BUG_ON(axp->pid_count >= AUDIT_AUX_PIDS); axp->target_pid[axp->pid_count] = task_tgid_nr(t); axp->target_auid[axp->pid_count] = audit_get_loginuid(t); axp->target_uid[axp->pid_count] = t_uid; axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t); security_task_getsecid_obj(t, &axp->target_sid[axp->pid_count]); memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN); axp->pid_count++; return 0; } /** * __audit_log_bprm_fcaps - store information about a loading bprm and relevant fcaps * @bprm: pointer to the bprm being processed * @new: the proposed new credentials * @old: the old credentials * * Simply check if the proc already has the caps given by the file and if not * store the priv escalation info for later auditing at the end of the syscall * * -Eric */ int __audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *new, const struct cred *old) { struct audit_aux_data_bprm_fcaps *ax; struct audit_context *context = audit_context(); struct cpu_vfs_cap_data vcaps; ax = kmalloc(sizeof(*ax), GFP_KERNEL); if (!ax) return -ENOMEM; ax->d.type = AUDIT_BPRM_FCAPS; ax->d.next = context->aux; context->aux = (void *)ax; get_vfs_caps_from_disk(&nop_mnt_idmap, bprm->file->f_path.dentry, &vcaps); ax->fcap.permitted = vcaps.permitted; ax->fcap.inheritable = vcaps.inheritable; ax->fcap.fE = !!(vcaps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); ax->fcap.rootid = vcaps.rootid; ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; ax->old_pcap.permitted = old->cap_permitted; ax->old_pcap.inheritable = old->cap_inheritable; ax->old_pcap.effective = old->cap_effective; ax->old_pcap.ambient = old->cap_ambient; ax->new_pcap.permitted = new->cap_permitted; ax->new_pcap.inheritable = new->cap_inheritable; ax->new_pcap.effective = new->cap_effective; ax->new_pcap.ambient = new->cap_ambient; return 0; } /** * __audit_log_capset - store information about the arguments to the capset syscall * @new: the new credentials * @old: the old (current) credentials * * Record the arguments userspace sent to sys_capset for later printing by the * audit system if applicable */ void __audit_log_capset(const struct cred *new, const struct cred *old) { struct audit_context *context = audit_context(); context->capset.pid = task_tgid_nr(current); context->capset.cap.effective = new->cap_effective; context->capset.cap.inheritable = new->cap_effective; context->capset.cap.permitted = new->cap_permitted; context->capset.cap.ambient = new->cap_ambient; context->type = AUDIT_CAPSET; } void __audit_mmap_fd(int fd, int flags) { struct audit_context *context = audit_context(); context->mmap.fd = fd; context->mmap.flags = flags; context->type = AUDIT_MMAP; } void __audit_openat2_how(struct open_how *how) { struct audit_context *context = audit_context(); context->openat2.flags = how->flags; context->openat2.mode = how->mode; context->openat2.resolve = how->resolve; context->type = AUDIT_OPENAT2; } void __audit_log_kern_module(char *name) { struct audit_context *context = audit_context(); context->module.name = kstrdup(name, GFP_KERNEL); if (!context->module.name) audit_log_lost("out of memory in __audit_log_kern_module"); context->type = AUDIT_KERN_MODULE; } void __audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar) { /* {subj,obj}_trust values are {0,1,2}: no,yes,unknown */ switch (friar->hdr.type) { case FAN_RESPONSE_INFO_NONE: audit_log(audit_context(), GFP_KERNEL, AUDIT_FANOTIFY, "resp=%u fan_type=%u fan_info=0 subj_trust=2 obj_trust=2", response, FAN_RESPONSE_INFO_NONE); break; case FAN_RESPONSE_INFO_AUDIT_RULE: audit_log(audit_context(), GFP_KERNEL, AUDIT_FANOTIFY, "resp=%u fan_type=%u fan_info=%X subj_trust=%u obj_trust=%u", response, friar->hdr.type, friar->rule_number, friar->subj_trust, friar->obj_trust); } } void __audit_tk_injoffset(struct timespec64 offset) { struct audit_context *context = audit_context(); /* only set type if not already set by NTP */ if (!context->type) context->type = AUDIT_TIME_INJOFFSET; memcpy(&context->time.tk_injoffset, &offset, sizeof(offset)); } void __audit_ntp_log(const struct audit_ntp_data *ad) { struct audit_context *context = audit_context(); int type; for (type = 0; type < AUDIT_NTP_NVALS; type++) if (ad->vals[type].newval != ad->vals[type].oldval) { /* unconditionally set type, overwriting TK */ context->type = AUDIT_TIME_ADJNTPVAL; memcpy(&context->time.ntp_data, ad, sizeof(*ad)); break; } } void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, enum audit_nfcfgop op, gfp_t gfp) { struct audit_buffer *ab; char comm[sizeof(current->comm)]; ab = audit_log_start(audit_context(), gfp, AUDIT_NETFILTER_CFG); if (!ab) return; audit_log_format(ab, "table=%s family=%u entries=%u op=%s", name, af, nentries, audit_nfcfgs[op].s); audit_log_format(ab, " pid=%u", task_pid_nr(current)); audit_log_task_context(ab); /* subj= */ audit_log_format(ab, " comm="); audit_log_untrustedstring(ab, get_task_comm(comm, current)); audit_log_end(ab); } EXPORT_SYMBOL_GPL(__audit_log_nfcfg); static void audit_log_task(struct audit_buffer *ab) { kuid_t auid, uid; kgid_t gid; unsigned int sessionid; char comm[sizeof(current->comm)]; auid = audit_get_loginuid(current); sessionid = audit_get_sessionid(current); current_uid_gid(&uid, &gid); audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u", from_kuid(&init_user_ns, auid), from_kuid(&init_user_ns, uid), from_kgid(&init_user_ns, gid), sessionid); audit_log_task_context(ab); audit_log_format(ab, " pid=%d comm=", task_tgid_nr(current)); audit_log_untrustedstring(ab, get_task_comm(comm, current)); audit_log_d_path_exe(ab, current->mm); } /** * audit_core_dumps - record information about processes that end abnormally * @signr: signal value * * If a process ends with a core dump, something fishy is going on and we * should record the event for investigation. */ void audit_core_dumps(long signr) { struct audit_buffer *ab; if (!audit_enabled) return; if (signr == SIGQUIT) /* don't care for those */ return; ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_ANOM_ABEND); if (unlikely(!ab)) return; audit_log_task(ab); audit_log_format(ab, " sig=%ld res=1", signr); audit_log_end(ab); } /** * audit_seccomp - record information about a seccomp action * @syscall: syscall number * @signr: signal value * @code: the seccomp action * * Record the information associated with a seccomp action. Event filtering for * seccomp actions that are not to be logged is done in seccomp_log(). * Therefore, this function forces auditing independent of the audit_enabled * and dummy context state because seccomp actions should be logged even when * audit is not in use. */ void audit_seccomp(unsigned long syscall, long signr, int code) { struct audit_buffer *ab; ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_SECCOMP); if (unlikely(!ab)) return; audit_log_task(ab); audit_log_format(ab, " sig=%ld arch=%x syscall=%ld compat=%d ip=0x%lx code=0x%x", signr, syscall_get_arch(current), syscall, in_compat_syscall(), KSTK_EIP(current), code); audit_log_end(ab); } void audit_seccomp_actions_logged(const char *names, const char *old_names, int res) { struct audit_buffer *ab; if (!audit_enabled) return; ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_CONFIG_CHANGE); if (unlikely(!ab)) return; audit_log_format(ab, "op=seccomp-logging actions=%s old-actions=%s res=%d", names, old_names, res); audit_log_end(ab); } struct list_head *audit_killed_trees(void) { struct audit_context *ctx = audit_context(); if (likely(!ctx || ctx->context == AUDIT_CTX_UNUSED)) return NULL; return &ctx->killed_trees; } |
1 1 465 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 | // SPDX-License-Identifier: GPL-2.0-only /* * Generic show_mem() implementation * * Copyright (C) 2008 Johannes Weiner <hannes@saeurebad.de> */ #include <linux/blkdev.h> #include <linux/cma.h> #include <linux/cpuset.h> #include <linux/highmem.h> #include <linux/hugetlb.h> #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/swap.h> #include <linux/vmstat.h> #include "internal.h" #include "swap.h" atomic_long_t _totalram_pages __read_mostly; EXPORT_SYMBOL(_totalram_pages); unsigned long totalreserve_pages __read_mostly; unsigned long totalcma_pages __read_mostly; static inline void show_node(struct zone *zone) { if (IS_ENABLED(CONFIG_NUMA)) printk("Node %d ", zone_to_nid(zone)); } long si_mem_available(void) { long available; unsigned long pagecache; unsigned long wmark_low = 0; unsigned long reclaimable; struct zone *zone; for_each_zone(zone) wmark_low += low_wmark_pages(zone); /* * Estimate the amount of memory available for userspace allocations, * without causing swapping or OOM. */ available = global_zone_page_state(NR_FREE_PAGES) - totalreserve_pages; /* * Not all the page cache can be freed, otherwise the system will * start swapping or thrashing. Assume at least half of the page * cache, or the low watermark worth of cache, needs to stay. */ pagecache = global_node_page_state(NR_ACTIVE_FILE) + global_node_page_state(NR_INACTIVE_FILE); pagecache -= min(pagecache / 2, wmark_low); available += pagecache; /* * Part of the reclaimable slab and other kernel memory consists of * items that are in use, and cannot be freed. Cap this estimate at the * low watermark. */ reclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) + global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE); reclaimable -= min(reclaimable / 2, wmark_low); available += reclaimable; if (available < 0) available = 0; return available; } EXPORT_SYMBOL_GPL(si_mem_available); void si_meminfo(struct sysinfo *val) { val->totalram = totalram_pages(); val->sharedram = global_node_page_state(NR_SHMEM); val->freeram = global_zone_page_state(NR_FREE_PAGES); val->bufferram = nr_blockdev_pages(); val->totalhigh = totalhigh_pages(); val->freehigh = nr_free_highpages(); val->mem_unit = PAGE_SIZE; } EXPORT_SYMBOL(si_meminfo); #ifdef CONFIG_NUMA void si_meminfo_node(struct sysinfo *val, int nid) { int zone_type; /* needs to be signed */ unsigned long managed_pages = 0; unsigned long managed_highpages = 0; unsigned long free_highpages = 0; pg_data_t *pgdat = NODE_DATA(nid); for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]); val->totalram = managed_pages; val->sharedram = node_page_state(pgdat, NR_SHMEM); val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES); #ifdef CONFIG_HIGHMEM for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) { struct zone *zone = &pgdat->node_zones[zone_type]; if (is_highmem(zone)) { managed_highpages += zone_managed_pages(zone); free_highpages += zone_page_state(zone, NR_FREE_PAGES); } } val->totalhigh = managed_highpages; val->freehigh = free_highpages; #else val->totalhigh = managed_highpages; val->freehigh = free_highpages; #endif val->mem_unit = PAGE_SIZE; } #endif /* * Determine whether the node should be displayed or not, depending on whether * SHOW_MEM_FILTER_NODES was passed to show_free_areas(). */ static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask) { if (!(flags & SHOW_MEM_FILTER_NODES)) return false; /* * no node mask - aka implicit memory numa policy. Do not bother with * the synchronization - read_mems_allowed_begin - because we do not * have to be precise here. */ if (!nodemask) nodemask = &cpuset_current_mems_allowed; return !node_isset(nid, *nodemask); } static void show_migration_types(unsigned char type) { static const char types[MIGRATE_TYPES] = { [MIGRATE_UNMOVABLE] = 'U', [MIGRATE_MOVABLE] = 'M', [MIGRATE_RECLAIMABLE] = 'E', [MIGRATE_HIGHATOMIC] = 'H', #ifdef CONFIG_CMA [MIGRATE_CMA] = 'C', #endif #ifdef CONFIG_MEMORY_ISOLATION [MIGRATE_ISOLATE] = 'I', #endif }; char tmp[MIGRATE_TYPES + 1]; char *p = tmp; int i; for (i = 0; i < MIGRATE_TYPES; i++) { if (type & (1 << i)) *p++ = types[i]; } *p = '\0'; printk(KERN_CONT "(%s) ", tmp); } static bool node_has_managed_zones(pg_data_t *pgdat, int max_zone_idx) { int zone_idx; for (zone_idx = 0; zone_idx <= max_zone_idx; zone_idx++) if (zone_managed_pages(pgdat->node_zones + zone_idx)) return true; return false; } /* * Show free area list (used inside shift_scroll-lock stuff) * We also calculate the percentage fragmentation. We do this by counting the * memory on each free list with the exception of the first item on the list. * * Bits in @filter: * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's * cpuset. */ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) { unsigned long free_pcp = 0; int cpu, nid; struct zone *zone; pg_data_t *pgdat; for_each_populated_zone(zone) { if (zone_idx(zone) > max_zone_idx) continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; for_each_online_cpu(cpu) free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count; } printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" " active_file:%lu inactive_file:%lu isolated_file:%lu\n" " unevictable:%lu dirty:%lu writeback:%lu\n" " slab_reclaimable:%lu slab_unreclaimable:%lu\n" " mapped:%lu shmem:%lu pagetables:%lu\n" " sec_pagetables:%lu bounce:%lu\n" " kernel_misc_reclaimable:%lu\n" " free:%lu free_pcp:%lu free_cma:%lu\n", global_node_page_state(NR_ACTIVE_ANON), global_node_page_state(NR_INACTIVE_ANON), global_node_page_state(NR_ISOLATED_ANON), global_node_page_state(NR_ACTIVE_FILE), global_node_page_state(NR_INACTIVE_FILE), global_node_page_state(NR_ISOLATED_FILE), global_node_page_state(NR_UNEVICTABLE), global_node_page_state(NR_FILE_DIRTY), global_node_page_state(NR_WRITEBACK), global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B), global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B), global_node_page_state(NR_FILE_MAPPED), global_node_page_state(NR_SHMEM), global_node_page_state(NR_PAGETABLE), global_node_page_state(NR_SECONDARY_PAGETABLE), global_zone_page_state(NR_BOUNCE), global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE), global_zone_page_state(NR_FREE_PAGES), free_pcp, global_zone_page_state(NR_FREE_CMA_PAGES)); for_each_online_pgdat(pgdat) { if (show_mem_node_skip(filter, pgdat->node_id, nodemask)) continue; if (!node_has_managed_zones(pgdat, max_zone_idx)) continue; printk("Node %d" " active_anon:%lukB" " inactive_anon:%lukB" " active_file:%lukB" " inactive_file:%lukB" " unevictable:%lukB" " isolated(anon):%lukB" " isolated(file):%lukB" " mapped:%lukB" " dirty:%lukB" " writeback:%lukB" " shmem:%lukB" #ifdef CONFIG_TRANSPARENT_HUGEPAGE " shmem_thp:%lukB" " shmem_pmdmapped:%lukB" " anon_thp:%lukB" #endif " writeback_tmp:%lukB" " kernel_stack:%lukB" #ifdef CONFIG_SHADOW_CALL_STACK " shadow_call_stack:%lukB" #endif " pagetables:%lukB" " sec_pagetables:%lukB" " all_unreclaimable? %s" "\n", pgdat->node_id, K(node_page_state(pgdat, NR_ACTIVE_ANON)), K(node_page_state(pgdat, NR_INACTIVE_ANON)), K(node_page_state(pgdat, NR_ACTIVE_FILE)), K(node_page_state(pgdat, NR_INACTIVE_FILE)), K(node_page_state(pgdat, NR_UNEVICTABLE)), K(node_page_state(pgdat, NR_ISOLATED_ANON)), K(node_page_state(pgdat, NR_ISOLATED_FILE)), K(node_page_state(pgdat, NR_FILE_MAPPED)), K(node_page_state(pgdat, NR_FILE_DIRTY)), K(node_page_state(pgdat, NR_WRITEBACK)), K(node_page_state(pgdat, NR_SHMEM)), #ifdef CONFIG_TRANSPARENT_HUGEPAGE K(node_page_state(pgdat, NR_SHMEM_THPS)), K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)), K(node_page_state(pgdat, NR_ANON_THPS)), #endif K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), node_page_state(pgdat, NR_KERNEL_STACK_KB), #ifdef CONFIG_SHADOW_CALL_STACK node_page_state(pgdat, NR_KERNEL_SCS_KB), #endif K(node_page_state(pgdat, NR_PAGETABLE)), K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)), pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? "yes" : "no"); } for_each_populated_zone(zone) { int i; if (zone_idx(zone) > max_zone_idx) continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; free_pcp = 0; for_each_online_cpu(cpu) free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count; show_node(zone); printk(KERN_CONT "%s" " free:%lukB" " boost:%lukB" " min:%lukB" " low:%lukB" " high:%lukB" " reserved_highatomic:%luKB" " active_anon:%lukB" " inactive_anon:%lukB" " active_file:%lukB" " inactive_file:%lukB" " unevictable:%lukB" " writepending:%lukB" " present:%lukB" " managed:%lukB" " mlocked:%lukB" " bounce:%lukB" " free_pcp:%lukB" " local_pcp:%ukB" " free_cma:%lukB" "\n", zone->name, K(zone_page_state(zone, NR_FREE_PAGES)), K(zone->watermark_boost), K(min_wmark_pages(zone)), K(low_wmark_pages(zone)), K(high_wmark_pages(zone)), K(zone->nr_reserved_highatomic), K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)), K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)), K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)), K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)), K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)), K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)), K(zone->present_pages), K(zone_managed_pages(zone)), K(zone_page_state(zone, NR_MLOCK)), K(zone_page_state(zone, NR_BOUNCE)), K(free_pcp), K(this_cpu_read(zone->per_cpu_pageset->count)), K(zone_page_state(zone, NR_FREE_CMA_PAGES))); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) printk(KERN_CONT " %ld", zone->lowmem_reserve[i]); printk(KERN_CONT "\n"); } for_each_populated_zone(zone) { unsigned int order; unsigned long nr[MAX_ORDER + 1], flags, total = 0; unsigned char types[MAX_ORDER + 1]; if (zone_idx(zone) > max_zone_idx) continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; show_node(zone); printk(KERN_CONT "%s: ", zone->name); spin_lock_irqsave(&zone->lock, flags); for (order = 0; order <= MAX_ORDER; order++) { struct free_area *area = &zone->free_area[order]; int type; nr[order] = area->nr_free; total += nr[order] << order; types[order] = 0; for (type = 0; type < MIGRATE_TYPES; type++) { if (!free_area_empty(area, type)) types[order] |= 1 << type; } } spin_unlock_irqrestore(&zone->lock, flags); for (order = 0; order <= MAX_ORDER; order++) { printk(KERN_CONT "%lu*%lukB ", nr[order], K(1UL) << order); if (nr[order]) show_migration_types(types[order]); } printk(KERN_CONT "= %lukB\n", K(total)); } for_each_online_node(nid) { if (show_mem_node_skip(filter, nid, nodemask)) continue; hugetlb_show_meminfo_node(nid); } printk("%ld total pagecache pages\n", global_node_page_state(NR_FILE_PAGES)); show_swap_cache_info(); } void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) { unsigned long total = 0, reserved = 0, highmem = 0; struct zone *zone; printk("Mem-Info:\n"); show_free_areas(filter, nodemask, max_zone_idx); for_each_populated_zone(zone) { total += zone->present_pages; reserved += zone->present_pages - zone_managed_pages(zone); if (is_highmem(zone)) highmem += zone->present_pages; } printk("%lu pages RAM\n", total); printk("%lu pages HighMem/MovableOnly\n", highmem); printk("%lu pages reserved\n", reserved); #ifdef CONFIG_CMA printk("%lu pages cma reserved\n", totalcma_pages); #endif #ifdef CONFIG_MEMORY_FAILURE printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages)); #endif } |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 | // SPDX-License-Identifier: GPL-2.0-only /* * Texas Instruments' Bluetooth HCILL UART protocol * * HCILL (HCI Low Level) is a Texas Instruments' power management * protocol extension to H4. * * Copyright (C) 2007 Texas Instruments, Inc. * * Written by Ohad Ben-Cohen <ohad@bencohen.org> * * Acknowledgements: * This file is based on hci_h4.c, which was written * by Maxim Krasnyansky and Marcel Holtmann. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/types.h> #include <linux/fcntl.h> #include <linux/firmware.h> #include <linux/interrupt.h> #include <linux/ptrace.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/signal.h> #include <linux/ioctl.h> #include <linux/of.h> #include <linux/serdev.h> #include <linux/skbuff.h> #include <linux/ti_wilink_st.h> #include <linux/clk.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <linux/gpio/consumer.h> #include <linux/nvmem-consumer.h> #include "hci_uart.h" /* Vendor-specific HCI commands */ #define HCI_VS_WRITE_BD_ADDR 0xfc06 #define HCI_VS_UPDATE_UART_HCI_BAUDRATE 0xff36 /* HCILL commands */ #define HCILL_GO_TO_SLEEP_IND 0x30 #define HCILL_GO_TO_SLEEP_ACK 0x31 #define HCILL_WAKE_UP_IND 0x32 #define HCILL_WAKE_UP_ACK 0x33 /* HCILL states */ enum hcill_states_e { HCILL_ASLEEP, HCILL_ASLEEP_TO_AWAKE, HCILL_AWAKE, HCILL_AWAKE_TO_ASLEEP }; struct ll_device { struct hci_uart hu; struct serdev_device *serdev; struct gpio_desc *enable_gpio; struct clk *ext_clk; bdaddr_t bdaddr; }; struct ll_struct { struct sk_buff *rx_skb; struct sk_buff_head txq; spinlock_t hcill_lock; /* HCILL state lock */ unsigned long hcill_state; /* HCILL power state */ struct sk_buff_head tx_wait_q; /* HCILL wait queue */ }; /* * Builds and sends an HCILL command packet. * These are very simple packets with only 1 cmd byte */ static int send_hcill_cmd(u8 cmd, struct hci_uart *hu) { int err = 0; struct sk_buff *skb = NULL; struct ll_struct *ll = hu->priv; BT_DBG("hu %p cmd 0x%x", hu, cmd); /* allocate packet */ skb = bt_skb_alloc(1, GFP_ATOMIC); if (!skb) { BT_ERR("cannot allocate memory for HCILL packet"); err = -ENOMEM; goto out; } /* prepare packet */ skb_put_u8(skb, cmd); /* send packet */ skb_queue_tail(&ll->txq, skb); out: return err; } /* Initialize protocol */ static int ll_open(struct hci_uart *hu) { struct ll_struct *ll; BT_DBG("hu %p", hu); ll = kzalloc(sizeof(*ll), GFP_KERNEL); if (!ll) return -ENOMEM; skb_queue_head_init(&ll->txq); skb_queue_head_init(&ll->tx_wait_q); spin_lock_init(&ll->hcill_lock); ll->hcill_state = HCILL_AWAKE; hu->priv = ll; if (hu->serdev) { struct ll_device *lldev = serdev_device_get_drvdata(hu->serdev); if (!IS_ERR(lldev->ext_clk)) clk_prepare_enable(lldev->ext_clk); } return 0; } /* Flush protocol data */ static int ll_flush(struct hci_uart *hu) { struct ll_struct *ll = hu->priv; BT_DBG("hu %p", hu); skb_queue_purge(&ll->tx_wait_q); skb_queue_purge(&ll->txq); return 0; } /* Close protocol */ static int ll_close(struct hci_uart *hu) { struct ll_struct *ll = hu->priv; BT_DBG("hu %p", hu); skb_queue_purge(&ll->tx_wait_q); skb_queue_purge(&ll->txq); kfree_skb(ll->rx_skb); if (hu->serdev) { struct ll_device *lldev = serdev_device_get_drvdata(hu->serdev); gpiod_set_value_cansleep(lldev->enable_gpio, 0); clk_disable_unprepare(lldev->ext_clk); } hu->priv = NULL; kfree(ll); return 0; } /* * internal function, which does common work of the device wake up process: * 1. places all pending packets (waiting in tx_wait_q list) in txq list. * 2. changes internal state to HCILL_AWAKE. * Note: assumes that hcill_lock spinlock is taken, * shouldn't be called otherwise! */ static void __ll_do_awake(struct ll_struct *ll) { struct sk_buff *skb = NULL; while ((skb = skb_dequeue(&ll->tx_wait_q))) skb_queue_tail(&ll->txq, skb); ll->hcill_state = HCILL_AWAKE; } /* * Called upon a wake-up-indication from the device */ static void ll_device_want_to_wakeup(struct hci_uart *hu) { unsigned long flags; struct ll_struct *ll = hu->priv; BT_DBG("hu %p", hu); /* lock hcill state */ spin_lock_irqsave(&ll->hcill_lock, flags); switch (ll->hcill_state) { case HCILL_ASLEEP_TO_AWAKE: /* * This state means that both the host and the BRF chip * have simultaneously sent a wake-up-indication packet. * Traditionally, in this case, receiving a wake-up-indication * was enough and an additional wake-up-ack wasn't needed. * This has changed with the BRF6350, which does require an * explicit wake-up-ack. Other BRF versions, which do not * require an explicit ack here, do accept it, thus it is * perfectly safe to always send one. */ BT_DBG("dual wake-up-indication"); fallthrough; case HCILL_ASLEEP: /* acknowledge device wake up */ if (send_hcill_cmd(HCILL_WAKE_UP_ACK, hu) < 0) { BT_ERR("cannot acknowledge device wake up"); goto out; } break; default: /* any other state is illegal */ BT_ERR("received HCILL_WAKE_UP_IND in state %ld", ll->hcill_state); break; } /* send pending packets and change state to HCILL_AWAKE */ __ll_do_awake(ll); out: spin_unlock_irqrestore(&ll->hcill_lock, flags); /* actually send the packets */ hci_uart_tx_wakeup(hu); } /* * Called upon a sleep-indication from the device */ static void ll_device_want_to_sleep(struct hci_uart *hu) { unsigned long flags; struct ll_struct *ll = hu->priv; BT_DBG("hu %p", hu); /* lock hcill state */ spin_lock_irqsave(&ll->hcill_lock, flags); /* sanity check */ if (ll->hcill_state != HCILL_AWAKE) BT_ERR("ERR: HCILL_GO_TO_SLEEP_IND in state %ld", ll->hcill_state); /* acknowledge device sleep */ if (send_hcill_cmd(HCILL_GO_TO_SLEEP_ACK, hu) < 0) { BT_ERR("cannot acknowledge device sleep"); goto out; } /* update state */ ll->hcill_state = HCILL_ASLEEP; out: spin_unlock_irqrestore(&ll->hcill_lock, flags); /* actually send the sleep ack packet */ hci_uart_tx_wakeup(hu); } /* * Called upon wake-up-acknowledgement from the device */ static void ll_device_woke_up(struct hci_uart *hu) { unsigned long flags; struct ll_struct *ll = hu->priv; BT_DBG("hu %p", hu); /* lock hcill state */ spin_lock_irqsave(&ll->hcill_lock, flags); /* sanity check */ if (ll->hcill_state != HCILL_ASLEEP_TO_AWAKE) BT_ERR("received HCILL_WAKE_UP_ACK in state %ld", ll->hcill_state); /* send pending packets and change state to HCILL_AWAKE */ __ll_do_awake(ll); spin_unlock_irqrestore(&ll->hcill_lock, flags); /* actually send the packets */ hci_uart_tx_wakeup(hu); } /* Enqueue frame for transmittion (padding, crc, etc) */ /* may be called from two simultaneous tasklets */ static int ll_enqueue(struct hci_uart *hu, struct sk_buff *skb) { unsigned long flags = 0; struct ll_struct *ll = hu->priv; BT_DBG("hu %p skb %p", hu, skb); /* Prepend skb with frame type */ memcpy(skb_push(skb, 1), &hci_skb_pkt_type(skb), 1); /* lock hcill state */ spin_lock_irqsave(&ll->hcill_lock, flags); /* act according to current state */ switch (ll->hcill_state) { case HCILL_AWAKE: BT_DBG("device awake, sending normally"); skb_queue_tail(&ll->txq, skb); break; case HCILL_ASLEEP: BT_DBG("device asleep, waking up and queueing packet"); /* save packet for later */ skb_queue_tail(&ll->tx_wait_q, skb); /* awake device */ if (send_hcill_cmd(HCILL_WAKE_UP_IND, hu) < 0) { BT_ERR("cannot wake up device"); break; } ll->hcill_state = HCILL_ASLEEP_TO_AWAKE; break; case HCILL_ASLEEP_TO_AWAKE: BT_DBG("device waking up, queueing packet"); /* transient state; just keep packet for later */ skb_queue_tail(&ll->tx_wait_q, skb); break; default: BT_ERR("illegal hcill state: %ld (losing packet)", ll->hcill_state); dev_kfree_skb_irq(skb); break; } spin_unlock_irqrestore(&ll->hcill_lock, flags); return 0; } static int ll_recv_frame(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_uart *hu = hci_get_drvdata(hdev); struct ll_struct *ll = hu->priv; switch (hci_skb_pkt_type(skb)) { case HCILL_GO_TO_SLEEP_IND: BT_DBG("HCILL_GO_TO_SLEEP_IND packet"); ll_device_want_to_sleep(hu); break; case HCILL_GO_TO_SLEEP_ACK: /* shouldn't happen */ bt_dev_err(hdev, "received HCILL_GO_TO_SLEEP_ACK in state %ld", ll->hcill_state); break; case HCILL_WAKE_UP_IND: BT_DBG("HCILL_WAKE_UP_IND packet"); ll_device_want_to_wakeup(hu); break; case HCILL_WAKE_UP_ACK: BT_DBG("HCILL_WAKE_UP_ACK packet"); ll_device_woke_up(hu); break; } kfree_skb(skb); return 0; } #define LL_RECV_SLEEP_IND \ .type = HCILL_GO_TO_SLEEP_IND, \ .hlen = 0, \ .loff = 0, \ .lsize = 0, \ .maxlen = 0 #define LL_RECV_SLEEP_ACK \ .type = HCILL_GO_TO_SLEEP_ACK, \ .hlen = 0, \ .loff = 0, \ .lsize = 0, \ .maxlen = 0 #define LL_RECV_WAKE_IND \ .type = HCILL_WAKE_UP_IND, \ .hlen = 0, \ .loff = 0, \ .lsize = 0, \ .maxlen = 0 #define LL_RECV_WAKE_ACK \ .type = HCILL_WAKE_UP_ACK, \ .hlen = 0, \ .loff = 0, \ .lsize = 0, \ .maxlen = 0 static const struct h4_recv_pkt ll_recv_pkts[] = { { H4_RECV_ACL, .recv = hci_recv_frame }, { H4_RECV_SCO, .recv = hci_recv_frame }, { H4_RECV_EVENT, .recv = hci_recv_frame }, { LL_RECV_SLEEP_IND, .recv = ll_recv_frame }, { LL_RECV_SLEEP_ACK, .recv = ll_recv_frame }, { LL_RECV_WAKE_IND, .recv = ll_recv_frame }, { LL_RECV_WAKE_ACK, .recv = ll_recv_frame }, }; /* Recv data */ static int ll_recv(struct hci_uart *hu, const void *data, int count) { struct ll_struct *ll = hu->priv; if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; ll->rx_skb = h4_recv_buf(hu->hdev, ll->rx_skb, data, count, ll_recv_pkts, ARRAY_SIZE(ll_recv_pkts)); if (IS_ERR(ll->rx_skb)) { int err = PTR_ERR(ll->rx_skb); bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err); ll->rx_skb = NULL; return err; } return count; } static struct sk_buff *ll_dequeue(struct hci_uart *hu) { struct ll_struct *ll = hu->priv; return skb_dequeue(&ll->txq); } #if IS_ENABLED(CONFIG_SERIAL_DEV_BUS) static int read_local_version(struct hci_dev *hdev) { int err = 0; unsigned short version = 0; struct sk_buff *skb; struct hci_rp_read_local_version *ver; skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { bt_dev_err(hdev, "Reading TI version information failed (%ld)", PTR_ERR(skb)); return PTR_ERR(skb); } if (skb->len != sizeof(*ver)) { err = -EILSEQ; goto out; } ver = (struct hci_rp_read_local_version *)skb->data; if (le16_to_cpu(ver->manufacturer) != 13) { err = -ENODEV; goto out; } version = le16_to_cpu(ver->lmp_subver); out: if (err) bt_dev_err(hdev, "Failed to read TI version info: %d", err); kfree_skb(skb); return err ? err : version; } static int send_command_from_firmware(struct ll_device *lldev, struct hci_command *cmd) { struct sk_buff *skb; if (cmd->opcode == HCI_VS_UPDATE_UART_HCI_BAUDRATE) { /* ignore remote change * baud rate HCI VS command */ bt_dev_warn(lldev->hu.hdev, "change remote baud rate command in firmware"); return 0; } if (cmd->prefix != 1) bt_dev_dbg(lldev->hu.hdev, "command type %d", cmd->prefix); skb = __hci_cmd_sync(lldev->hu.hdev, cmd->opcode, cmd->plen, &cmd->speed, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { bt_dev_err(lldev->hu.hdev, "send command failed"); return PTR_ERR(skb); } kfree_skb(skb); return 0; } /* * download_firmware - * internal function which parses through the .bts firmware * script file intreprets SEND, DELAY actions only as of now */ static int download_firmware(struct ll_device *lldev) { unsigned short chip, min_ver, maj_ver; int version, err, len; unsigned char *ptr, *action_ptr; unsigned char bts_scr_name[40]; /* 40 char long bts scr name? */ const struct firmware *fw; struct hci_command *cmd; version = read_local_version(lldev->hu.hdev); if (version < 0) return version; chip = (version & 0x7C00) >> 10; min_ver = (version & 0x007F); maj_ver = (version & 0x0380) >> 7; if (version & 0x8000) maj_ver |= 0x0008; snprintf(bts_scr_name, sizeof(bts_scr_name), "ti-connectivity/TIInit_%d.%d.%d.bts", chip, maj_ver, min_ver); err = request_firmware(&fw, bts_scr_name, &lldev->serdev->dev); if (err || !fw->data || !fw->size) { bt_dev_err(lldev->hu.hdev, "request_firmware failed(errno %d) for %s", err, bts_scr_name); return -EINVAL; } ptr = (void *)fw->data; len = fw->size; /* bts_header to remove out magic number and * version */ ptr += sizeof(struct bts_header); len -= sizeof(struct bts_header); while (len > 0 && ptr) { bt_dev_dbg(lldev->hu.hdev, " action size %d, type %d ", ((struct bts_action *)ptr)->size, ((struct bts_action *)ptr)->type); action_ptr = &(((struct bts_action *)ptr)->data[0]); switch (((struct bts_action *)ptr)->type) { case ACTION_SEND_COMMAND: /* action send */ bt_dev_dbg(lldev->hu.hdev, "S"); cmd = (struct hci_command *)action_ptr; err = send_command_from_firmware(lldev, cmd); if (err) goto out_rel_fw; break; case ACTION_WAIT_EVENT: /* wait */ /* no need to wait as command was synchronous */ bt_dev_dbg(lldev->hu.hdev, "W"); break; case ACTION_DELAY: /* sleep */ bt_dev_info(lldev->hu.hdev, "sleep command in scr"); msleep(((struct bts_action_delay *)action_ptr)->msec); break; } len -= (sizeof(struct bts_action) + ((struct bts_action *)ptr)->size); ptr += sizeof(struct bts_action) + ((struct bts_action *)ptr)->size; } out_rel_fw: /* fw download complete */ release_firmware(fw); return err; } static int ll_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) { bdaddr_t bdaddr_swapped; struct sk_buff *skb; /* HCI_VS_WRITE_BD_ADDR (at least on a CC2560A chip) expects the BD * address to be MSB first, but bdaddr_t has the convention of being * LSB first. */ baswap(&bdaddr_swapped, bdaddr); skb = __hci_cmd_sync(hdev, HCI_VS_WRITE_BD_ADDR, sizeof(bdaddr_t), &bdaddr_swapped, HCI_INIT_TIMEOUT); if (!IS_ERR(skb)) kfree_skb(skb); return PTR_ERR_OR_ZERO(skb); } static int ll_setup(struct hci_uart *hu) { int err, retry = 3; struct ll_device *lldev; struct serdev_device *serdev = hu->serdev; u32 speed; if (!serdev) return 0; lldev = serdev_device_get_drvdata(serdev); hu->hdev->set_bdaddr = ll_set_bdaddr; serdev_device_set_flow_control(serdev, true); do { /* Reset the Bluetooth device */ gpiod_set_value_cansleep(lldev->enable_gpio, 0); msleep(5); gpiod_set_value_cansleep(lldev->enable_gpio, 1); mdelay(100); err = serdev_device_wait_for_cts(serdev, true, 200); if (err) { bt_dev_err(hu->hdev, "Failed to get CTS"); return err; } err = download_firmware(lldev); if (!err) break; /* Toggle BT_EN and retry */ bt_dev_err(hu->hdev, "download firmware failed, retrying..."); } while (retry--); if (err) return err; /* Set BD address if one was specified at probe */ if (!bacmp(&lldev->bdaddr, BDADDR_NONE)) { /* This means that there was an error getting the BD address * during probe, so mark the device as having a bad address. */ set_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks); } else if (bacmp(&lldev->bdaddr, BDADDR_ANY)) { err = ll_set_bdaddr(hu->hdev, &lldev->bdaddr); if (err) set_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks); } /* Operational speed if any */ if (hu->oper_speed) speed = hu->oper_speed; else if (hu->proto->oper_speed) speed = hu->proto->oper_speed; else speed = 0; if (speed) { __le32 speed_le = cpu_to_le32(speed); struct sk_buff *skb; skb = __hci_cmd_sync(hu->hdev, HCI_VS_UPDATE_UART_HCI_BAUDRATE, sizeof(speed_le), &speed_le, HCI_INIT_TIMEOUT); if (!IS_ERR(skb)) { kfree_skb(skb); serdev_device_set_baudrate(serdev, speed); } } return 0; } static const struct hci_uart_proto llp; static int hci_ti_probe(struct serdev_device *serdev) { struct hci_uart *hu; struct ll_device *lldev; struct nvmem_cell *bdaddr_cell; u32 max_speed = 3000000; lldev = devm_kzalloc(&serdev->dev, sizeof(struct ll_device), GFP_KERNEL); if (!lldev) return -ENOMEM; hu = &lldev->hu; serdev_device_set_drvdata(serdev, lldev); lldev->serdev = hu->serdev = serdev; lldev->enable_gpio = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(lldev->enable_gpio)) return PTR_ERR(lldev->enable_gpio); lldev->ext_clk = devm_clk_get(&serdev->dev, "ext_clock"); if (IS_ERR(lldev->ext_clk) && PTR_ERR(lldev->ext_clk) != -ENOENT) return PTR_ERR(lldev->ext_clk); of_property_read_u32(serdev->dev.of_node, "max-speed", &max_speed); hci_uart_set_speeds(hu, 115200, max_speed); /* optional BD address from nvram */ bdaddr_cell = nvmem_cell_get(&serdev->dev, "bd-address"); if (IS_ERR(bdaddr_cell)) { int err = PTR_ERR(bdaddr_cell); if (err == -EPROBE_DEFER) return err; /* ENOENT means there is no matching nvmem cell and ENOSYS * means that nvmem is not enabled in the kernel configuration. */ if (err != -ENOENT && err != -ENOSYS) { /* If there was some other error, give userspace a * chance to fix the problem instead of failing to load * the driver. Using BDADDR_NONE as a flag that is * tested later in the setup function. */ dev_warn(&serdev->dev, "Failed to get \"bd-address\" nvmem cell (%d)\n", err); bacpy(&lldev->bdaddr, BDADDR_NONE); } } else { bdaddr_t *bdaddr; size_t len; bdaddr = nvmem_cell_read(bdaddr_cell, &len); nvmem_cell_put(bdaddr_cell); if (IS_ERR(bdaddr)) { dev_err(&serdev->dev, "Failed to read nvmem bd-address\n"); return PTR_ERR(bdaddr); } if (len != sizeof(bdaddr_t)) { dev_err(&serdev->dev, "Invalid nvmem bd-address length\n"); kfree(bdaddr); return -EINVAL; } /* As per the device tree bindings, the value from nvmem is * expected to be MSB first, but in the kernel it is expected * that bdaddr_t is LSB first. */ baswap(&lldev->bdaddr, bdaddr); kfree(bdaddr); } return hci_uart_register_device(hu, &llp); } static void hci_ti_remove(struct serdev_device *serdev) { struct ll_device *lldev = serdev_device_get_drvdata(serdev); hci_uart_unregister_device(&lldev->hu); } static const struct of_device_id hci_ti_of_match[] = { { .compatible = "ti,cc2560" }, { .compatible = "ti,wl1271-st" }, { .compatible = "ti,wl1273-st" }, { .compatible = "ti,wl1281-st" }, { .compatible = "ti,wl1283-st" }, { .compatible = "ti,wl1285-st" }, { .compatible = "ti,wl1801-st" }, { .compatible = "ti,wl1805-st" }, { .compatible = "ti,wl1807-st" }, { .compatible = "ti,wl1831-st" }, { .compatible = "ti,wl1835-st" }, { .compatible = "ti,wl1837-st" }, {}, }; MODULE_DEVICE_TABLE(of, hci_ti_of_match); static struct serdev_device_driver hci_ti_drv = { .driver = { .name = "hci-ti", .of_match_table = hci_ti_of_match, }, .probe = hci_ti_probe, .remove = hci_ti_remove, }; #else #define ll_setup NULL #endif static const struct hci_uart_proto llp = { .id = HCI_UART_LL, .name = "LL", .setup = ll_setup, .open = ll_open, .close = ll_close, .recv = ll_recv, .enqueue = ll_enqueue, .dequeue = ll_dequeue, .flush = ll_flush, }; int __init ll_init(void) { serdev_device_driver_register(&hci_ti_drv); return hci_uart_register_proto(&llp); } int __exit ll_deinit(void) { serdev_device_driver_unregister(&hci_ti_drv); return hci_uart_unregister_proto(&llp); } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner */ #ifndef _NET_BATMAN_ADV_SOFT_INTERFACE_H_ #define _NET_BATMAN_ADV_SOFT_INTERFACE_H_ #include "main.h" #include <linux/kref.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/types.h> #include <net/rtnetlink.h> int batadv_skb_head_push(struct sk_buff *skb, unsigned int len); void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb, int hdr_size, struct batadv_orig_node *orig_node); bool batadv_softif_is_valid(const struct net_device *net_dev); extern struct rtnl_link_ops batadv_link_ops; int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid); void batadv_softif_vlan_release(struct kref *ref); struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, unsigned short vid); /** * batadv_softif_vlan_put() - decrease the vlan object refcounter and * possibly release it * @vlan: the vlan object to release */ static inline void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan) { if (!vlan) return; kref_put(&vlan->refcount, batadv_softif_vlan_release); } #endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ |
4 1 1 5 5 4 2 2 4 1 1 2 4 1 1 2 4 4 4 4 4 2 2 2 2 9 1 2 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 | // SPDX-License-Identifier: GPL-2.0-only /* * slip.c This module implements the SLIP protocol for kernel-based * devices like TTY. It interfaces between a raw TTY, and the * kernel's INET protocol layers. * * Version: @(#)slip.c 0.8.3 12/24/94 * * Authors: Laurence Culhane, <loz@holmes.demon.co.uk> * Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org> * * Fixes: * Alan Cox : Sanity checks and avoid tx overruns. * Has a new sl->mtu field. * Alan Cox : Found cause of overrun. ifconfig sl0 * mtu upwards. Driver now spots this * and grows/shrinks its buffers(hack!). * Memory leak if you run out of memory * setting up a slip driver fixed. * Matt Dillon : Printable slip (borrowed from NET2E) * Pauline Middelink : Slip driver fixes. * Alan Cox : Honours the old SL_COMPRESSED flag * Alan Cox : KISS AX.25 and AXUI IP support * Michael Riepe : Automatic CSLIP recognition added * Charles Hedrick : CSLIP header length problem fix. * Alan Cox : Corrected non-IP cases of the above. * Alan Cox : Now uses hardware type as per FvK. * Alan Cox : Default to 192.168.0.0 (RFC 1597) * A.N.Kuznetsov : dev_tint() recursion fix. * Dmitry Gorodchanin : SLIP memory leaks * Dmitry Gorodchanin : Code cleanup. Reduce tty driver * buffering from 4096 to 256 bytes. * Improving SLIP response time. * CONFIG_SLIP_MODE_SLIP6. * ifconfig sl? up & down now works * correctly. * Modularization. * Alan Cox : Oops - fix AX.25 buffer lengths * Dmitry Gorodchanin : Even more cleanups. Preserve CSLIP * statistics. Include CSLIP code only * if it really needed. * Alan Cox : Free slhc buffers in the right place. * Alan Cox : Allow for digipeated IP over AX.25 * Matti Aarnio : Dynamic SLIP devices, with ideas taken * from Jim Freeman's <jfree@caldera.com> * dynamic PPP devices. We do NOT kfree() * device entries, just reg./unreg. them * as they are needed. We kfree() them * at module cleanup. * With MODULE-loading ``insmod'', user * can issue parameter: slip_maxdev=1024 * (Or how much he/she wants.. Default * is 256) * Stanislav Voronyi : Slip line checking, with ideas taken * from multislip BSDI driver which was * written by Igor Chechik, RELCOM Corp. * Only algorithms have been ported to * Linux SLIP driver. * Vitaly E. Lavrov : Sane behaviour on tty hangup. * Alexey Kuznetsov : Cleanup interfaces to tty & netdevice * modules. */ #define SL_CHECK_TRANSMIT #include <linux/compat.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/sched/signal.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/in.h> #include <linux/tty.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/if_arp.h> #include <linux/if_slip.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/workqueue.h> #include "slip.h" #ifdef CONFIG_INET #include <linux/ip.h> #include <linux/tcp.h> #include <net/slhc_vj.h> #endif #define SLIP_VERSION "0.8.4-NET3.019-NEWTTY" static struct net_device **slip_devs; static int slip_maxdev = SL_NRUNIT; module_param(slip_maxdev, int, 0); MODULE_PARM_DESC(slip_maxdev, "Maximum number of slip devices"); static int slip_esc(unsigned char *p, unsigned char *d, int len); static void slip_unesc(struct slip *sl, unsigned char c); #ifdef CONFIG_SLIP_MODE_SLIP6 static int slip_esc6(unsigned char *p, unsigned char *d, int len); static void slip_unesc6(struct slip *sl, unsigned char c); #endif #ifdef CONFIG_SLIP_SMART static void sl_keepalive(struct timer_list *t); static void sl_outfill(struct timer_list *t); static int sl_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd); #endif /******************************** * Buffer administration routines: * sl_alloc_bufs() * sl_free_bufs() * sl_realloc_bufs() * * NOTE: sl_realloc_bufs != sl_free_bufs + sl_alloc_bufs, because * sl_realloc_bufs provides strong atomicity and reallocation * on actively running device. *********************************/ /* Allocate channel buffers. */ static int sl_alloc_bufs(struct slip *sl, int mtu) { int err = -ENOBUFS; unsigned long len; char *rbuff = NULL; char *xbuff = NULL; #ifdef SL_INCLUDE_CSLIP char *cbuff = NULL; struct slcompress *slcomp = NULL; #endif /* * Allocate the SLIP frame buffers: * * rbuff Receive buffer. * xbuff Transmit buffer. * cbuff Temporary compression buffer. */ len = mtu * 2; /* * allow for arrival of larger UDP packets, even if we say not to * also fixes a bug in which SunOS sends 512-byte packets even with * an MSS of 128 */ if (len < 576 * 2) len = 576 * 2; rbuff = kmalloc(len + 4, GFP_KERNEL); if (rbuff == NULL) goto err_exit; xbuff = kmalloc(len + 4, GFP_KERNEL); if (xbuff == NULL) goto err_exit; #ifdef SL_INCLUDE_CSLIP cbuff = kmalloc(len + 4, GFP_KERNEL); if (cbuff == NULL) goto err_exit; slcomp = slhc_init(16, 16); if (IS_ERR(slcomp)) goto err_exit; #endif spin_lock_bh(&sl->lock); if (sl->tty == NULL) { spin_unlock_bh(&sl->lock); err = -ENODEV; goto err_exit; } sl->mtu = mtu; sl->buffsize = len; sl->rcount = 0; sl->xleft = 0; rbuff = xchg(&sl->rbuff, rbuff); xbuff = xchg(&sl->xbuff, xbuff); #ifdef SL_INCLUDE_CSLIP cbuff = xchg(&sl->cbuff, cbuff); slcomp = xchg(&sl->slcomp, slcomp); #endif #ifdef CONFIG_SLIP_MODE_SLIP6 sl->xdata = 0; sl->xbits = 0; #endif spin_unlock_bh(&sl->lock); err = 0; /* Cleanup */ err_exit: #ifdef SL_INCLUDE_CSLIP kfree(cbuff); slhc_free(slcomp); #endif kfree(xbuff); kfree(rbuff); return err; } /* Free a SLIP channel buffers. */ static void sl_free_bufs(struct slip *sl) { /* Free all SLIP frame buffers. */ kfree(xchg(&sl->rbuff, NULL)); kfree(xchg(&sl->xbuff, NULL)); #ifdef SL_INCLUDE_CSLIP kfree(xchg(&sl->cbuff, NULL)); slhc_free(xchg(&sl->slcomp, NULL)); #endif } /* Reallocate slip channel buffers. */ static int sl_realloc_bufs(struct slip *sl, int mtu) { int err = 0; struct net_device *dev = sl->dev; unsigned char *xbuff, *rbuff; #ifdef SL_INCLUDE_CSLIP unsigned char *cbuff; #endif int len = mtu * 2; /* * allow for arrival of larger UDP packets, even if we say not to * also fixes a bug in which SunOS sends 512-byte packets even with * an MSS of 128 */ if (len < 576 * 2) len = 576 * 2; xbuff = kmalloc(len + 4, GFP_ATOMIC); rbuff = kmalloc(len + 4, GFP_ATOMIC); #ifdef SL_INCLUDE_CSLIP cbuff = kmalloc(len + 4, GFP_ATOMIC); #endif #ifdef SL_INCLUDE_CSLIP if (xbuff == NULL || rbuff == NULL || cbuff == NULL) { #else if (xbuff == NULL || rbuff == NULL) { #endif if (mtu > sl->mtu) { printk(KERN_WARNING "%s: unable to grow slip buffers, MTU change cancelled.\n", dev->name); err = -ENOBUFS; } goto done; } spin_lock_bh(&sl->lock); err = -ENODEV; if (sl->tty == NULL) goto done_on_bh; xbuff = xchg(&sl->xbuff, xbuff); rbuff = xchg(&sl->rbuff, rbuff); #ifdef SL_INCLUDE_CSLIP cbuff = xchg(&sl->cbuff, cbuff); #endif if (sl->xleft) { if (sl->xleft <= len) { memcpy(sl->xbuff, sl->xhead, sl->xleft); } else { sl->xleft = 0; dev->stats.tx_dropped++; } } sl->xhead = sl->xbuff; if (sl->rcount) { if (sl->rcount <= len) { memcpy(sl->rbuff, rbuff, sl->rcount); } else { sl->rcount = 0; dev->stats.rx_over_errors++; set_bit(SLF_ERROR, &sl->flags); } } sl->mtu = mtu; dev->mtu = mtu; sl->buffsize = len; err = 0; done_on_bh: spin_unlock_bh(&sl->lock); done: kfree(xbuff); kfree(rbuff); #ifdef SL_INCLUDE_CSLIP kfree(cbuff); #endif return err; } /* Set the "sending" flag. This must be atomic hence the set_bit. */ static inline void sl_lock(struct slip *sl) { netif_stop_queue(sl->dev); } /* Clear the "sending" flag. This must be atomic, hence the ASM. */ static inline void sl_unlock(struct slip *sl) { netif_wake_queue(sl->dev); } /* Send one completely decapsulated IP datagram to the IP layer. */ static void sl_bump(struct slip *sl) { struct net_device *dev = sl->dev; struct sk_buff *skb; int count; count = sl->rcount; #ifdef SL_INCLUDE_CSLIP if (sl->mode & (SL_MODE_ADAPTIVE | SL_MODE_CSLIP)) { unsigned char c = sl->rbuff[0]; if (c & SL_TYPE_COMPRESSED_TCP) { /* ignore compressed packets when CSLIP is off */ if (!(sl->mode & SL_MODE_CSLIP)) { printk(KERN_WARNING "%s: compressed packet ignored\n", dev->name); return; } /* make sure we've reserved enough space for uncompress to use */ if (count + 80 > sl->buffsize) { dev->stats.rx_over_errors++; return; } count = slhc_uncompress(sl->slcomp, sl->rbuff, count); if (count <= 0) return; } else if (c >= SL_TYPE_UNCOMPRESSED_TCP) { if (!(sl->mode & SL_MODE_CSLIP)) { /* turn on header compression */ sl->mode |= SL_MODE_CSLIP; sl->mode &= ~SL_MODE_ADAPTIVE; printk(KERN_INFO "%s: header compression turned on\n", dev->name); } sl->rbuff[0] &= 0x4f; if (slhc_remember(sl->slcomp, sl->rbuff, count) <= 0) return; } } #endif /* SL_INCLUDE_CSLIP */ dev->stats.rx_bytes += count; skb = dev_alloc_skb(count); if (skb == NULL) { printk(KERN_WARNING "%s: memory squeeze, dropping packet.\n", dev->name); dev->stats.rx_dropped++; return; } skb->dev = dev; skb_put_data(skb, sl->rbuff, count); skb_reset_mac_header(skb); skb->protocol = htons(ETH_P_IP); netif_rx(skb); dev->stats.rx_packets++; } /* Encapsulate one IP datagram and stuff into a TTY queue. */ static void sl_encaps(struct slip *sl, unsigned char *icp, int len) { unsigned char *p; int actual, count; if (len > sl->mtu) { /* Sigh, shouldn't occur BUT ... */ printk(KERN_WARNING "%s: truncating oversized transmit packet!\n", sl->dev->name); sl->dev->stats.tx_dropped++; sl_unlock(sl); return; } p = icp; #ifdef SL_INCLUDE_CSLIP if (sl->mode & SL_MODE_CSLIP) len = slhc_compress(sl->slcomp, p, len, sl->cbuff, &p, 1); #endif #ifdef CONFIG_SLIP_MODE_SLIP6 if (sl->mode & SL_MODE_SLIP6) count = slip_esc6(p, sl->xbuff, len); else #endif count = slip_esc(p, sl->xbuff, len); /* Order of next two lines is *very* important. * When we are sending a little amount of data, * the transfer may be completed inside the ops->write() * routine, because it's running with interrupts enabled. * In this case we *never* got WRITE_WAKEUP event, * if we did not request it before write operation. * 14 Oct 1994 Dmitry Gorodchanin. */ set_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); actual = sl->tty->ops->write(sl->tty, sl->xbuff, count); #ifdef SL_CHECK_TRANSMIT netif_trans_update(sl->dev); #endif sl->xleft = count - actual; sl->xhead = sl->xbuff + actual; #ifdef CONFIG_SLIP_SMART /* VSV */ clear_bit(SLF_OUTWAIT, &sl->flags); /* reset outfill flag */ #endif } /* Write out any remaining transmit buffer. Scheduled when tty is writable */ static void slip_transmit(struct work_struct *work) { struct slip *sl = container_of(work, struct slip, tx_work); int actual; spin_lock_bh(&sl->lock); /* First make sure we're connected. */ if (!sl->tty || sl->magic != SLIP_MAGIC || !netif_running(sl->dev)) { spin_unlock_bh(&sl->lock); return; } if (sl->xleft <= 0) { /* Now serial buffer is almost free & we can start * transmission of another packet */ sl->dev->stats.tx_packets++; clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); spin_unlock_bh(&sl->lock); sl_unlock(sl); return; } actual = sl->tty->ops->write(sl->tty, sl->xhead, sl->xleft); sl->xleft -= actual; sl->xhead += actual; spin_unlock_bh(&sl->lock); } /* * Called by the driver when there's room for more data. * Schedule the transmit. */ static void slip_write_wakeup(struct tty_struct *tty) { struct slip *sl; rcu_read_lock(); sl = rcu_dereference(tty->disc_data); if (sl) schedule_work(&sl->tx_work); rcu_read_unlock(); } static void sl_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct slip *sl = netdev_priv(dev); spin_lock(&sl->lock); if (netif_queue_stopped(dev)) { if (!netif_running(dev) || !sl->tty) goto out; /* May be we must check transmitter timeout here ? * 14 Oct 1994 Dmitry Gorodchanin. */ #ifdef SL_CHECK_TRANSMIT if (time_before(jiffies, dev_trans_start(dev) + 20 * HZ)) { /* 20 sec timeout not reached */ goto out; } printk(KERN_WARNING "%s: transmit timed out, %s?\n", dev->name, (tty_chars_in_buffer(sl->tty) || sl->xleft) ? "bad line quality" : "driver error"); sl->xleft = 0; clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); sl_unlock(sl); #endif } out: spin_unlock(&sl->lock); } /* Encapsulate an IP datagram and kick it into a TTY queue. */ static netdev_tx_t sl_xmit(struct sk_buff *skb, struct net_device *dev) { struct slip *sl = netdev_priv(dev); spin_lock(&sl->lock); if (!netif_running(dev)) { spin_unlock(&sl->lock); printk(KERN_WARNING "%s: xmit call when iface is down\n", dev->name); dev_kfree_skb(skb); return NETDEV_TX_OK; } if (sl->tty == NULL) { spin_unlock(&sl->lock); dev_kfree_skb(skb); return NETDEV_TX_OK; } sl_lock(sl); dev->stats.tx_bytes += skb->len; sl_encaps(sl, skb->data, skb->len); spin_unlock(&sl->lock); dev_kfree_skb(skb); return NETDEV_TX_OK; } /****************************************** * Routines looking at netdevice side. ******************************************/ /* Netdevice UP -> DOWN routine */ static int sl_close(struct net_device *dev) { struct slip *sl = netdev_priv(dev); spin_lock_bh(&sl->lock); if (sl->tty) /* TTY discipline is running. */ clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); netif_stop_queue(dev); sl->rcount = 0; sl->xleft = 0; spin_unlock_bh(&sl->lock); return 0; } /* Netdevice DOWN -> UP routine */ static int sl_open(struct net_device *dev) { struct slip *sl = netdev_priv(dev); if (sl->tty == NULL) return -ENODEV; sl->flags &= (1 << SLF_INUSE); netif_start_queue(dev); return 0; } /* Netdevice change MTU request */ static int sl_change_mtu(struct net_device *dev, int new_mtu) { struct slip *sl = netdev_priv(dev); return sl_realloc_bufs(sl, new_mtu); } /* Netdevice get statistics request */ static void sl_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct net_device_stats *devstats = &dev->stats; #ifdef SL_INCLUDE_CSLIP struct slip *sl = netdev_priv(dev); struct slcompress *comp = sl->slcomp; #endif stats->rx_packets = devstats->rx_packets; stats->tx_packets = devstats->tx_packets; stats->rx_bytes = devstats->rx_bytes; stats->tx_bytes = devstats->tx_bytes; stats->rx_dropped = devstats->rx_dropped; stats->tx_dropped = devstats->tx_dropped; stats->tx_errors = devstats->tx_errors; stats->rx_errors = devstats->rx_errors; stats->rx_over_errors = devstats->rx_over_errors; #ifdef SL_INCLUDE_CSLIP if (comp) { /* Generic compressed statistics */ stats->rx_compressed = comp->sls_i_compressed; stats->tx_compressed = comp->sls_o_compressed; /* Are we really still needs this? */ stats->rx_fifo_errors += comp->sls_i_compressed; stats->rx_dropped += comp->sls_i_tossed; stats->tx_fifo_errors += comp->sls_o_compressed; stats->collisions += comp->sls_o_misses; } #endif } /* Netdevice register callback */ static int sl_init(struct net_device *dev) { struct slip *sl = netdev_priv(dev); /* * Finish setting up the DEVICE info. */ dev->mtu = sl->mtu; dev->type = ARPHRD_SLIP + sl->mode; #ifdef SL_CHECK_TRANSMIT dev->watchdog_timeo = 20*HZ; #endif return 0; } static void sl_uninit(struct net_device *dev) { struct slip *sl = netdev_priv(dev); sl_free_bufs(sl); } /* Hook the destructor so we can free slip devices at the right point in time */ static void sl_free_netdev(struct net_device *dev) { int i = dev->base_addr; slip_devs[i] = NULL; } static const struct net_device_ops sl_netdev_ops = { .ndo_init = sl_init, .ndo_uninit = sl_uninit, .ndo_open = sl_open, .ndo_stop = sl_close, .ndo_start_xmit = sl_xmit, .ndo_get_stats64 = sl_get_stats64, .ndo_change_mtu = sl_change_mtu, .ndo_tx_timeout = sl_tx_timeout, #ifdef CONFIG_SLIP_SMART .ndo_siocdevprivate = sl_siocdevprivate, #endif }; static void sl_setup(struct net_device *dev) { dev->netdev_ops = &sl_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = sl_free_netdev; dev->hard_header_len = 0; dev->addr_len = 0; dev->tx_queue_len = 10; /* MTU range: 68 - 65534 */ dev->min_mtu = 68; dev->max_mtu = 65534; /* New-style flags. */ dev->flags = IFF_NOARP|IFF_POINTOPOINT|IFF_MULTICAST; } /****************************************** Routines looking at TTY side. ******************************************/ /* * Handle the 'receiver data ready' interrupt. * This function is called by the 'tty_io' module in the kernel when * a block of SLIP data has been received, which can now be decapsulated * and sent on to some IP layer for further processing. This will not * be re-entered while running but other ldisc functions may be called * in parallel */ static void slip_receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { struct slip *sl = tty->disc_data; if (!sl || sl->magic != SLIP_MAGIC || !netif_running(sl->dev)) return; /* Read the characters out of the buffer */ while (count--) { if (fp && *fp++) { if (!test_and_set_bit(SLF_ERROR, &sl->flags)) sl->dev->stats.rx_errors++; cp++; continue; } #ifdef CONFIG_SLIP_MODE_SLIP6 if (sl->mode & SL_MODE_SLIP6) slip_unesc6(sl, *cp++); else #endif slip_unesc(sl, *cp++); } } /************************************ * slip_open helper routines. ************************************/ /* Collect hanged up channels */ static void sl_sync(void) { int i; struct net_device *dev; struct slip *sl; for (i = 0; i < slip_maxdev; i++) { dev = slip_devs[i]; if (dev == NULL) break; sl = netdev_priv(dev); if (sl->tty || sl->leased) continue; if (dev->flags & IFF_UP) dev_close(dev); } } /* Find a free SLIP channel, and link in this `tty' line. */ static struct slip *sl_alloc(void) { int i; char name[IFNAMSIZ]; struct net_device *dev = NULL; struct slip *sl; for (i = 0; i < slip_maxdev; i++) { dev = slip_devs[i]; if (dev == NULL) break; } /* Sorry, too many, all slots in use */ if (i >= slip_maxdev) return NULL; sprintf(name, "sl%d", i); dev = alloc_netdev(sizeof(*sl), name, NET_NAME_UNKNOWN, sl_setup); if (!dev) return NULL; dev->base_addr = i; sl = netdev_priv(dev); /* Initialize channel control data */ sl->magic = SLIP_MAGIC; sl->dev = dev; spin_lock_init(&sl->lock); INIT_WORK(&sl->tx_work, slip_transmit); sl->mode = SL_MODE_DEFAULT; #ifdef CONFIG_SLIP_SMART /* initialize timer_list struct */ timer_setup(&sl->keepalive_timer, sl_keepalive, 0); timer_setup(&sl->outfill_timer, sl_outfill, 0); #endif slip_devs[i] = dev; return sl; } /* * Open the high-level part of the SLIP channel. * This function is called by the TTY module when the * SLIP line discipline is called for. Because we are * sure the tty line exists, we only have to link it to * a free SLIP channel... * * Called in process context serialized from other ldisc calls. */ static int slip_open(struct tty_struct *tty) { struct slip *sl; int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (tty->ops->write == NULL) return -EOPNOTSUPP; /* RTnetlink lock is misused here to serialize concurrent opens of slip channels. There are better ways, but it is the simplest one. */ rtnl_lock(); /* Collect hanged up channels. */ sl_sync(); sl = tty->disc_data; err = -EEXIST; /* First make sure we're not already connected. */ if (sl && sl->magic == SLIP_MAGIC) goto err_exit; /* OK. Find a free SLIP channel to use. */ err = -ENFILE; sl = sl_alloc(); if (sl == NULL) goto err_exit; sl->tty = tty; tty->disc_data = sl; sl->pid = current->pid; if (!test_bit(SLF_INUSE, &sl->flags)) { /* Perform the low-level SLIP initialization. */ err = sl_alloc_bufs(sl, SL_MTU); if (err) goto err_free_chan; set_bit(SLF_INUSE, &sl->flags); err = register_netdevice(sl->dev); if (err) goto err_free_bufs; } #ifdef CONFIG_SLIP_SMART if (sl->keepalive) { sl->keepalive_timer.expires = jiffies + sl->keepalive * HZ; add_timer(&sl->keepalive_timer); } if (sl->outfill) { sl->outfill_timer.expires = jiffies + sl->outfill * HZ; add_timer(&sl->outfill_timer); } #endif /* Done. We have linked the TTY line to a channel. */ rtnl_unlock(); tty->receive_room = 65536; /* We don't flow control */ /* TTY layer expects 0 on success */ return 0; err_free_bufs: sl_free_bufs(sl); err_free_chan: sl->tty = NULL; tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); sl_free_netdev(sl->dev); /* do not call free_netdev before rtnl_unlock */ rtnl_unlock(); free_netdev(sl->dev); return err; err_exit: rtnl_unlock(); /* Count references from TTY module */ return err; } /* * Close down a SLIP channel. * This means flushing out any pending queues, and then returning. This * call is serialized against other ldisc functions. * * We also use this method fo a hangup event */ static void slip_close(struct tty_struct *tty) { struct slip *sl = tty->disc_data; /* First make sure we're connected. */ if (!sl || sl->magic != SLIP_MAGIC || sl->tty != tty) return; spin_lock_bh(&sl->lock); rcu_assign_pointer(tty->disc_data, NULL); sl->tty = NULL; spin_unlock_bh(&sl->lock); synchronize_rcu(); flush_work(&sl->tx_work); /* VSV = very important to remove timers */ #ifdef CONFIG_SLIP_SMART del_timer_sync(&sl->keepalive_timer); del_timer_sync(&sl->outfill_timer); #endif /* Flush network side */ unregister_netdev(sl->dev); /* This will complete via sl_free_netdev */ } static void slip_hangup(struct tty_struct *tty) { slip_close(tty); } /************************************************************************ * STANDARD SLIP ENCAPSULATION * ************************************************************************/ static int slip_esc(unsigned char *s, unsigned char *d, int len) { unsigned char *ptr = d; unsigned char c; /* * Send an initial END character to flush out any * data that may have accumulated in the receiver * due to line noise. */ *ptr++ = END; /* * For each byte in the packet, send the appropriate * character sequence, according to the SLIP protocol. */ while (len-- > 0) { switch (c = *s++) { case END: *ptr++ = ESC; *ptr++ = ESC_END; break; case ESC: *ptr++ = ESC; *ptr++ = ESC_ESC; break; default: *ptr++ = c; break; } } *ptr++ = END; return ptr - d; } static void slip_unesc(struct slip *sl, unsigned char s) { switch (s) { case END: #ifdef CONFIG_SLIP_SMART /* drop keeptest bit = VSV */ if (test_bit(SLF_KEEPTEST, &sl->flags)) clear_bit(SLF_KEEPTEST, &sl->flags); #endif if (!test_and_clear_bit(SLF_ERROR, &sl->flags) && (sl->rcount > 2)) sl_bump(sl); clear_bit(SLF_ESCAPE, &sl->flags); sl->rcount = 0; return; case ESC: set_bit(SLF_ESCAPE, &sl->flags); return; case ESC_ESC: if (test_and_clear_bit(SLF_ESCAPE, &sl->flags)) s = ESC; break; case ESC_END: if (test_and_clear_bit(SLF_ESCAPE, &sl->flags)) s = END; break; } if (!test_bit(SLF_ERROR, &sl->flags)) { if (sl->rcount < sl->buffsize) { sl->rbuff[sl->rcount++] = s; return; } sl->dev->stats.rx_over_errors++; set_bit(SLF_ERROR, &sl->flags); } } #ifdef CONFIG_SLIP_MODE_SLIP6 /************************************************************************ * 6 BIT SLIP ENCAPSULATION * ************************************************************************/ static int slip_esc6(unsigned char *s, unsigned char *d, int len) { unsigned char *ptr = d; unsigned char c; int i; unsigned short v = 0; short bits = 0; /* * Send an initial END character to flush out any * data that may have accumulated in the receiver * due to line noise. */ *ptr++ = 0x70; /* * Encode the packet into printable ascii characters */ for (i = 0; i < len; ++i) { v = (v << 8) | s[i]; bits += 8; while (bits >= 6) { bits -= 6; c = 0x30 + ((v >> bits) & 0x3F); *ptr++ = c; } } if (bits) { c = 0x30 + ((v << (6 - bits)) & 0x3F); *ptr++ = c; } *ptr++ = 0x70; return ptr - d; } static void slip_unesc6(struct slip *sl, unsigned char s) { unsigned char c; if (s == 0x70) { #ifdef CONFIG_SLIP_SMART /* drop keeptest bit = VSV */ if (test_bit(SLF_KEEPTEST, &sl->flags)) clear_bit(SLF_KEEPTEST, &sl->flags); #endif if (!test_and_clear_bit(SLF_ERROR, &sl->flags) && (sl->rcount > 2)) sl_bump(sl); sl->rcount = 0; sl->xbits = 0; sl->xdata = 0; } else if (s >= 0x30 && s < 0x70) { sl->xdata = (sl->xdata << 6) | ((s - 0x30) & 0x3F); sl->xbits += 6; if (sl->xbits >= 8) { sl->xbits -= 8; c = (unsigned char)(sl->xdata >> sl->xbits); if (!test_bit(SLF_ERROR, &sl->flags)) { if (sl->rcount < sl->buffsize) { sl->rbuff[sl->rcount++] = c; return; } sl->dev->stats.rx_over_errors++; set_bit(SLF_ERROR, &sl->flags); } } } } #endif /* CONFIG_SLIP_MODE_SLIP6 */ /* Perform I/O control on an active SLIP channel. */ static int slip_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct slip *sl = tty->disc_data; unsigned int tmp; int __user *p = (int __user *)arg; /* First make sure we're connected. */ if (!sl || sl->magic != SLIP_MAGIC) return -EINVAL; switch (cmd) { case SIOCGIFNAME: tmp = strlen(sl->dev->name) + 1; if (copy_to_user((void __user *)arg, sl->dev->name, tmp)) return -EFAULT; return 0; case SIOCGIFENCAP: if (put_user(sl->mode, p)) return -EFAULT; return 0; case SIOCSIFENCAP: if (get_user(tmp, p)) return -EFAULT; #ifndef SL_INCLUDE_CSLIP if (tmp & (SL_MODE_CSLIP|SL_MODE_ADAPTIVE)) return -EINVAL; #else if ((tmp & (SL_MODE_ADAPTIVE | SL_MODE_CSLIP)) == (SL_MODE_ADAPTIVE | SL_MODE_CSLIP)) /* return -EINVAL; */ tmp &= ~SL_MODE_ADAPTIVE; #endif #ifndef CONFIG_SLIP_MODE_SLIP6 if (tmp & SL_MODE_SLIP6) return -EINVAL; #endif sl->mode = tmp; sl->dev->type = ARPHRD_SLIP + sl->mode; return 0; case SIOCSIFHWADDR: return -EINVAL; #ifdef CONFIG_SLIP_SMART /* VSV changes start here */ case SIOCSKEEPALIVE: if (get_user(tmp, p)) return -EFAULT; if (tmp > 255) /* max for unchar */ return -EINVAL; spin_lock_bh(&sl->lock); if (!sl->tty) { spin_unlock_bh(&sl->lock); return -ENODEV; } sl->keepalive = (u8)tmp; if (sl->keepalive != 0) { mod_timer(&sl->keepalive_timer, jiffies + sl->keepalive * HZ); set_bit(SLF_KEEPTEST, &sl->flags); } else del_timer(&sl->keepalive_timer); spin_unlock_bh(&sl->lock); return 0; case SIOCGKEEPALIVE: if (put_user(sl->keepalive, p)) return -EFAULT; return 0; case SIOCSOUTFILL: if (get_user(tmp, p)) return -EFAULT; if (tmp > 255) /* max for unchar */ return -EINVAL; spin_lock_bh(&sl->lock); if (!sl->tty) { spin_unlock_bh(&sl->lock); return -ENODEV; } sl->outfill = (u8)tmp; if (sl->outfill != 0) { mod_timer(&sl->outfill_timer, jiffies + sl->outfill * HZ); set_bit(SLF_OUTWAIT, &sl->flags); } else del_timer(&sl->outfill_timer); spin_unlock_bh(&sl->lock); return 0; case SIOCGOUTFILL: if (put_user(sl->outfill, p)) return -EFAULT; return 0; /* VSV changes end */ #endif default: return tty_mode_ioctl(tty, cmd, arg); } } /* VSV changes start here */ #ifdef CONFIG_SLIP_SMART /* function sl_siocdevprivate called from net/core/dev.c to allow get/set outfill/keepalive parameter by ifconfig */ static int sl_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd) { struct slip *sl = netdev_priv(dev); unsigned long *p = (unsigned long *)&rq->ifr_ifru; if (sl == NULL) /* Allocation failed ?? */ return -ENODEV; if (in_compat_syscall()) return -EOPNOTSUPP; spin_lock_bh(&sl->lock); if (!sl->tty) { spin_unlock_bh(&sl->lock); return -ENODEV; } switch (cmd) { case SIOCSKEEPALIVE: /* max for unchar */ if ((unsigned)*p > 255) { spin_unlock_bh(&sl->lock); return -EINVAL; } sl->keepalive = (u8)*p; if (sl->keepalive != 0) { sl->keepalive_timer.expires = jiffies + sl->keepalive * HZ; mod_timer(&sl->keepalive_timer, jiffies + sl->keepalive * HZ); set_bit(SLF_KEEPTEST, &sl->flags); } else del_timer(&sl->keepalive_timer); break; case SIOCGKEEPALIVE: *p = sl->keepalive; break; case SIOCSOUTFILL: if ((unsigned)*p > 255) { /* max for unchar */ spin_unlock_bh(&sl->lock); return -EINVAL; } sl->outfill = (u8)*p; if (sl->outfill != 0) { mod_timer(&sl->outfill_timer, jiffies + sl->outfill * HZ); set_bit(SLF_OUTWAIT, &sl->flags); } else del_timer(&sl->outfill_timer); break; case SIOCGOUTFILL: *p = sl->outfill; break; case SIOCSLEASE: /* Resolve race condition, when ioctl'ing hanged up and opened by another process device. */ if (sl->tty != current->signal->tty && sl->pid != current->pid) { spin_unlock_bh(&sl->lock); return -EPERM; } sl->leased = 0; if (*p) sl->leased = 1; break; case SIOCGLEASE: *p = sl->leased; } spin_unlock_bh(&sl->lock); return 0; } #endif /* VSV changes end */ static struct tty_ldisc_ops sl_ldisc = { .owner = THIS_MODULE, .num = N_SLIP, .name = "slip", .open = slip_open, .close = slip_close, .hangup = slip_hangup, .ioctl = slip_ioctl, .receive_buf = slip_receive_buf, .write_wakeup = slip_write_wakeup, }; static int __init slip_init(void) { int status; if (slip_maxdev < 4) slip_maxdev = 4; /* Sanity */ printk(KERN_INFO "SLIP: version %s (dynamic channels, max=%d)" #ifdef CONFIG_SLIP_MODE_SLIP6 " (6 bit encapsulation enabled)" #endif ".\n", SLIP_VERSION, slip_maxdev); #if defined(SL_INCLUDE_CSLIP) printk(KERN_INFO "CSLIP: code copyright 1989 Regents of the University of California.\n"); #endif #ifdef CONFIG_SLIP_SMART printk(KERN_INFO "SLIP linefill/keepalive option.\n"); #endif slip_devs = kcalloc(slip_maxdev, sizeof(struct net_device *), GFP_KERNEL); if (!slip_devs) return -ENOMEM; /* Fill in our line protocol discipline, and register it */ status = tty_register_ldisc(&sl_ldisc); if (status != 0) { printk(KERN_ERR "SLIP: can't register line discipline (err = %d)\n", status); kfree(slip_devs); } return status; } static void __exit slip_exit(void) { int i; struct net_device *dev; struct slip *sl; unsigned long timeout = jiffies + HZ; int busy = 0; if (slip_devs == NULL) return; /* First of all: check for active disciplines and hangup them. */ do { if (busy) msleep_interruptible(100); busy = 0; for (i = 0; i < slip_maxdev; i++) { dev = slip_devs[i]; if (!dev) continue; sl = netdev_priv(dev); spin_lock_bh(&sl->lock); if (sl->tty) { busy++; tty_hangup(sl->tty); } spin_unlock_bh(&sl->lock); } } while (busy && time_before(jiffies, timeout)); /* FIXME: hangup is async so we should wait when doing this second phase */ for (i = 0; i < slip_maxdev; i++) { dev = slip_devs[i]; if (!dev) continue; slip_devs[i] = NULL; sl = netdev_priv(dev); if (sl->tty) { printk(KERN_ERR "%s: tty discipline still running\n", dev->name); } unregister_netdev(dev); } kfree(slip_devs); slip_devs = NULL; tty_unregister_ldisc(&sl_ldisc); } module_init(slip_init); module_exit(slip_exit); #ifdef CONFIG_SLIP_SMART /* * This is start of the code for multislip style line checking * added by Stanislav Voronyi. All changes before marked VSV */ static void sl_outfill(struct timer_list *t) { struct slip *sl = from_timer(sl, t, outfill_timer); spin_lock(&sl->lock); if (sl->tty == NULL) goto out; if (sl->outfill) { if (test_bit(SLF_OUTWAIT, &sl->flags)) { /* no packets were transmitted, do outfill */ #ifdef CONFIG_SLIP_MODE_SLIP6 unsigned char s = (sl->mode & SL_MODE_SLIP6)?0x70:END; #else unsigned char s = END; #endif /* put END into tty queue. Is it right ??? */ if (!netif_queue_stopped(sl->dev)) { /* if device busy no outfill */ sl->tty->ops->write(sl->tty, &s, 1); } } else set_bit(SLF_OUTWAIT, &sl->flags); mod_timer(&sl->outfill_timer, jiffies+sl->outfill*HZ); } out: spin_unlock(&sl->lock); } static void sl_keepalive(struct timer_list *t) { struct slip *sl = from_timer(sl, t, keepalive_timer); spin_lock(&sl->lock); if (sl->tty == NULL) goto out; if (sl->keepalive) { if (test_bit(SLF_KEEPTEST, &sl->flags)) { /* keepalive still high :(, we must hangup */ if (sl->outfill) /* outfill timer must be deleted too */ (void)del_timer(&sl->outfill_timer); printk(KERN_DEBUG "%s: no packets received during keepalive timeout, hangup.\n", sl->dev->name); /* this must hangup tty & close slip */ tty_hangup(sl->tty); /* I think we need not something else */ goto out; } else set_bit(SLF_KEEPTEST, &sl->flags); mod_timer(&sl->keepalive_timer, jiffies+sl->keepalive*HZ); } out: spin_unlock(&sl->lock); } #endif MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_SLIP); |
12 187 3 10 53 1 276 32 274 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ADDRCONF_H #define _ADDRCONF_H #define MAX_RTR_SOLICITATIONS -1 /* unlimited */ #define RTR_SOLICITATION_INTERVAL (4*HZ) #define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ #define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ #define TEMP_VALID_LIFETIME (7*86400) #define TEMP_PREFERRED_LIFETIME (86400) #define REGEN_MAX_RETRY (3) #define MAX_DESYNC_FACTOR (600) #define ADDR_CHECK_FREQUENCY (120*HZ) #define IPV6_MAX_ADDRESSES 16 #define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ / 50 : 1) #define ADDRCONF_TIMER_FUZZ (HZ / 4) #define ADDRCONF_TIMER_FUZZ_MAX (HZ) #define ADDRCONF_NOTIFY_PRIORITY 0 #include <linux/in.h> #include <linux/in6.h> struct prefix_info { __u8 type; __u8 length; __u8 prefix_len; union __packed { __u8 flags; struct __packed { #if defined(__BIG_ENDIAN_BITFIELD) __u8 onlink : 1, autoconf : 1, reserved : 6; #elif defined(__LITTLE_ENDIAN_BITFIELD) __u8 reserved : 6, autoconf : 1, onlink : 1; #else #error "Please fix <asm/byteorder.h>" #endif }; }; __be32 valid; __be32 prefered; __be32 reserved2; struct in6_addr prefix; }; /* rfc4861 4.6.2: IPv6 PIO is 32 bytes in size */ static_assert(sizeof(struct prefix_info) == 32); #include <linux/ipv6.h> #include <linux/netdevice.h> #include <net/if_inet6.h> #include <net/ipv6.h> struct in6_validator_info { struct in6_addr i6vi_addr; struct inet6_dev *i6vi_dev; struct netlink_ext_ack *extack; }; struct ifa6_config { const struct in6_addr *pfx; unsigned int plen; u8 ifa_proto; const struct in6_addr *peer_pfx; u32 rt_priority; u32 ifa_flags; u32 preferred_lft; u32 valid_lft; u16 scope; }; int addrconf_init(void); void addrconf_cleanup(void); int addrconf_add_ifaddr(struct net *net, void __user *arg); int addrconf_del_ifaddr(struct net *net, void __user *arg); int addrconf_set_dstaddr(struct net *net, void __user *arg); int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, const struct net_device *dev, bool skip_dev_check, int strict, u32 banned_flags); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr); #endif int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs, unsigned char nsegs); bool ipv6_chk_custom_prefix(const struct in6_addr *addr, const unsigned int prefix_len, struct net_device *dev); int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev); struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr, struct net_device *dev); struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, struct net_device *dev, int strict); int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, bool match_wildcard); bool inet_rcv_saddr_any(const struct sock *sk); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr, u32 flags); int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, const struct prefix_info *pinfo, struct inet6_dev *in6_dev, const struct in6_addr *addr, int addr_type, u32 addr_flags, bool sllao, bool tokenized, __u32 valid_lft, u32 prefered_lft); static inline void addrconf_addr_eui48_base(u8 *eui, const char *const addr) { memcpy(eui, addr, 3); eui[3] = 0xFF; eui[4] = 0xFE; memcpy(eui + 5, addr + 3, 3); } static inline void addrconf_addr_eui48(u8 *eui, const char *const addr) { addrconf_addr_eui48_base(eui, addr); eui[0] ^= 2; } static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) { if (dev->addr_len != ETH_ALEN) return -1; /* * The zSeries OSA network cards can be shared among various * OS instances, but the OSA cards have only one MAC address. * This leads to duplicate address conflicts in conjunction * with IPv6 if more than one instance uses the same card. * * The driver for these cards can deliver a unique 16-bit * identifier for each instance sharing the same card. It is * placed instead of 0xFFFE in the interface identifier. The * "u" bit of the interface identifier is not inverted in this * case. Hence the resulting interface identifier has local * scope according to RFC2373. */ addrconf_addr_eui48_base(eui, dev->dev_addr); if (dev->dev_id) { eui[3] = (dev->dev_id >> 8) & 0xFF; eui[4] = dev->dev_id & 0xFF; } else { eui[0] ^= 2; } return 0; } static inline unsigned long addrconf_timeout_fixup(u32 timeout, unsigned int unit) { if (timeout == 0xffffffff) return ~0UL; /* * Avoid arithmetic overflow. * Assuming unit is constant and non-zero, this "if" statement * will go away on 64bit archs. */ if (0xfffffffe > LONG_MAX / unit && timeout > LONG_MAX / unit) return LONG_MAX / unit; return timeout; } static inline int addrconf_finite_timeout(unsigned long timeout) { return ~timeout; } /* * IPv6 Address Label subsystem (addrlabel.c) */ int ipv6_addr_label_init(void); void ipv6_addr_label_cleanup(void); int ipv6_addr_label_rtnl_register(void); u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, int type, int ifindex); /* * multicast prototypes (mcast.c) */ static inline bool ipv6_mc_may_pull(struct sk_buff *skb, unsigned int len) { if (skb_transport_offset(skb) + ipv6_transport_len(skb) < len) return false; return pskb_may_pull(skb, len); } int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr); int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); void __ipv6_sock_mc_close(struct sock *sk); void ipv6_sock_mc_close(struct sock *sk); bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr, const struct in6_addr *src_addr); int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr); int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr); int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr); void ipv6_mc_up(struct inet6_dev *idev); void ipv6_mc_down(struct inet6_dev *idev); void ipv6_mc_unmap(struct inet6_dev *idev); void ipv6_mc_remap(struct inet6_dev *idev); void ipv6_mc_init_dev(struct inet6_dev *idev); void ipv6_mc_destroy_dev(struct inet6_dev *idev); int ipv6_mc_check_mld(struct sk_buff *skb); void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp); bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, const struct in6_addr *src_addr); void ipv6_mc_dad_complete(struct inet6_dev *idev); /* * identify MLD packets for MLD filter exceptions */ static inline bool ipv6_is_mld(struct sk_buff *skb, int nexthdr, int offset) { struct icmp6hdr *hdr; if (nexthdr != IPPROTO_ICMPV6 || !pskb_network_may_pull(skb, offset + sizeof(struct icmp6hdr))) return false; hdr = (struct icmp6hdr *)(skb_network_header(skb) + offset); switch (hdr->icmp6_type) { case ICMPV6_MGM_QUERY: case ICMPV6_MGM_REPORT: case ICMPV6_MGM_REDUCTION: case ICMPV6_MLD2_REPORT: return true; default: break; } return false; } void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao); /* * anycast prototypes (anycast.c) */ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr); int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); void __ipv6_sock_ac_close(struct sock *sk); void ipv6_sock_ac_close(struct sock *sk); int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr); int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr); void ipv6_ac_destroy_dev(struct inet6_dev *idev); bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, const struct in6_addr *addr); int ipv6_anycast_init(void); void ipv6_anycast_cleanup(void); /* Device notifier */ int register_inet6addr_notifier(struct notifier_block *nb); int unregister_inet6addr_notifier(struct notifier_block *nb); int inet6addr_notifier_call_chain(unsigned long val, void *v); int register_inet6addr_validator_notifier(struct notifier_block *nb); int unregister_inet6addr_validator_notifier(struct notifier_block *nb); int inet6addr_validator_notifier_call_chain(unsigned long val, void *v); void inet6_netconf_notify_devconf(struct net *net, int event, int type, int ifindex, struct ipv6_devconf *devconf); /** * __in6_dev_get - get inet6_dev pointer from netdevice * @dev: network device * * Caller must hold rcu_read_lock or RTNL, because this function * does not take a reference on the inet6_dev. */ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev) { return rcu_dereference_rtnl(dev->ip6_ptr); } /** * __in6_dev_stats_get - get inet6_dev pointer for stats * @dev: network device * @skb: skb for original incoming interface if neeeded * * Caller must hold rcu_read_lock or RTNL, because this function * does not take a reference on the inet6_dev. */ static inline struct inet6_dev *__in6_dev_stats_get(const struct net_device *dev, const struct sk_buff *skb) { if (netif_is_l3_master(dev)) dev = dev_get_by_index_rcu(dev_net(dev), inet6_iif(skb)); return __in6_dev_get(dev); } /** * __in6_dev_get_safely - get inet6_dev pointer from netdevice * @dev: network device * * This is a safer version of __in6_dev_get */ static inline struct inet6_dev *__in6_dev_get_safely(const struct net_device *dev) { if (likely(dev)) return rcu_dereference_rtnl(dev->ip6_ptr); else return NULL; } /** * in6_dev_get - get inet6_dev pointer from netdevice * @dev: network device * * This version can be used in any context, and takes a reference * on the inet6_dev. Callers must use in6_dev_put() later to * release this reference. */ static inline struct inet6_dev *in6_dev_get(const struct net_device *dev) { struct inet6_dev *idev; rcu_read_lock(); idev = rcu_dereference(dev->ip6_ptr); if (idev) refcount_inc(&idev->refcnt); rcu_read_unlock(); return idev; } static inline struct neigh_parms *__in6_dev_nd_parms_get_rcu(const struct net_device *dev) { struct inet6_dev *idev = __in6_dev_get(dev); return idev ? idev->nd_parms : NULL; } void in6_dev_finish_destroy(struct inet6_dev *idev); static inline void in6_dev_put(struct inet6_dev *idev) { if (refcount_dec_and_test(&idev->refcnt)) in6_dev_finish_destroy(idev); } static inline void in6_dev_put_clear(struct inet6_dev **pidev) { struct inet6_dev *idev = *pidev; if (idev) { in6_dev_put(idev); *pidev = NULL; } } static inline void __in6_dev_put(struct inet6_dev *idev) { refcount_dec(&idev->refcnt); } static inline void in6_dev_hold(struct inet6_dev *idev) { refcount_inc(&idev->refcnt); } /* called with rcu_read_lock held */ static inline bool ip6_ignore_linkdown(const struct net_device *dev) { const struct inet6_dev *idev = __in6_dev_get(dev); if (unlikely(!idev)) return true; return !!idev->cnf.ignore_routes_with_linkdown; } void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); static inline void in6_ifa_put(struct inet6_ifaddr *ifp) { if (refcount_dec_and_test(&ifp->refcnt)) inet6_ifa_finish_destroy(ifp); } static inline void __in6_ifa_put(struct inet6_ifaddr *ifp) { refcount_dec(&ifp->refcnt); } static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) { refcount_inc(&ifp->refcnt); } /* * compute link-local solicited-node multicast address */ static inline void addrconf_addr_solict_mult(const struct in6_addr *addr, struct in6_addr *solicited) { ipv6_addr_set(solicited, htonl(0xFF020000), 0, htonl(0x1), htonl(0xFF000000) | addr->s6_addr32[3]); } static inline bool ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(1))) == 0UL; #else return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | addr->s6_addr32[2] | (addr->s6_addr32[3] ^ htonl(0x00000001))) == 0; #endif } static inline bool ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(2))) == 0UL; #else return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | addr->s6_addr32[2] | (addr->s6_addr32[3] ^ htonl(0x00000002))) == 0; #endif } static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr) { return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); } static inline bool ipv6_addr_is_solict_mult(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | ((p[1] ^ cpu_to_be64(0x00000001ff000000UL)) & cpu_to_be64(0xffffffffff000000UL))) == 0UL; #else return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | (addr->s6_addr32[2] ^ htonl(0x00000001)) | (addr->s6_addr[12] ^ 0xff)) == 0; #endif } static inline bool ipv6_addr_is_all_snoopers(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(0x6a))) == 0UL; #else return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | addr->s6_addr32[2] | (addr->s6_addr32[3] ^ htonl(0x0000006a))) == 0; #endif } #ifdef CONFIG_PROC_FS int if6_proc_init(void); void if6_proc_exit(void); #endif #endif |
461 8 386 6168 3371 3429 1896 349 3 18 258 323 8 25 490 6163 6149 6244 6244 733 2078 6644 6425 2372 53 1074 53 6158 6110 828 637 1 6169 6037 499 1 740 730 33 5526 14 721 6061 3 4479 377 2772 2774 3291 3514 1934 1897 111 20 1929 1928 1925 1112 110 71 14 86 85 4863 4641 220 5 20 601 603 3 3 19 19 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 | // SPDX-License-Identifier: GPL-2.0 /* * Kernel internal timers * * Copyright (C) 1991, 1992 Linus Torvalds * * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. * * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 * "A Kernel Model for Precision Timekeeping" by Dave Mills * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to * serialize accesses to xtime/lost_ticks). * Copyright (C) 1998 Andrea Arcangeli * 1999-03-10 Improved NTP compatibility by Ulrich Windl * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love * 2000-10-05 Implemented scalable SMP per-CPU timer handling. * Copyright (C) 2000, 2001, 2002 Ingo Molnar * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar */ #include <linux/kernel_stat.h> #include <linux/export.h> #include <linux/interrupt.h> #include <linux/percpu.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/pid_namespace.h> #include <linux/notifier.h> #include <linux/thread_info.h> #include <linux/time.h> #include <linux/jiffies.h> #include <linux/posix-timers.h> #include <linux/cpu.h> #include <linux/syscalls.h> #include <linux/delay.h> #include <linux/tick.h> #include <linux/kallsyms.h> #include <linux/irq_work.h> #include <linux/sched/signal.h> #include <linux/sched/sysctl.h> #include <linux/sched/nohz.h> #include <linux/sched/debug.h> #include <linux/slab.h> #include <linux/compat.h> #include <linux/random.h> #include <linux/sysctl.h> #include <linux/uaccess.h> #include <asm/unistd.h> #include <asm/div64.h> #include <asm/timex.h> #include <asm/io.h> #include "tick-internal.h" #define CREATE_TRACE_POINTS #include <trace/events/timer.h> __visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); /* * The timer wheel has LVL_DEPTH array levels. Each level provides an array of * LVL_SIZE buckets. Each level is driven by its own clock and therefor each * level has a different granularity. * * The level granularity is: LVL_CLK_DIV ^ lvl * The level clock frequency is: HZ / (LVL_CLK_DIV ^ level) * * The array level of a newly armed timer depends on the relative expiry * time. The farther the expiry time is away the higher the array level and * therefor the granularity becomes. * * Contrary to the original timer wheel implementation, which aims for 'exact' * expiry of the timers, this implementation removes the need for recascading * the timers into the lower array levels. The previous 'classic' timer wheel * implementation of the kernel already violated the 'exact' expiry by adding * slack to the expiry time to provide batched expiration. The granularity * levels provide implicit batching. * * This is an optimization of the original timer wheel implementation for the * majority of the timer wheel use cases: timeouts. The vast majority of * timeout timers (networking, disk I/O ...) are canceled before expiry. If * the timeout expires it indicates that normal operation is disturbed, so it * does not matter much whether the timeout comes with a slight delay. * * The only exception to this are networking timers with a small expiry * time. They rely on the granularity. Those fit into the first wheel level, * which has HZ granularity. * * We don't have cascading anymore. timers with a expiry time above the * capacity of the last wheel level are force expired at the maximum timeout * value of the last wheel level. From data sampling we know that the maximum * value observed is 5 days (network connection tracking), so this should not * be an issue. * * The currently chosen array constants values are a good compromise between * array size and granularity. * * This results in the following granularity and range levels: * * HZ 1000 steps * Level Offset Granularity Range * 0 0 1 ms 0 ms - 63 ms * 1 64 8 ms 64 ms - 511 ms * 2 128 64 ms 512 ms - 4095 ms (512ms - ~4s) * 3 192 512 ms 4096 ms - 32767 ms (~4s - ~32s) * 4 256 4096 ms (~4s) 32768 ms - 262143 ms (~32s - ~4m) * 5 320 32768 ms (~32s) 262144 ms - 2097151 ms (~4m - ~34m) * 6 384 262144 ms (~4m) 2097152 ms - 16777215 ms (~34m - ~4h) * 7 448 2097152 ms (~34m) 16777216 ms - 134217727 ms (~4h - ~1d) * 8 512 16777216 ms (~4h) 134217728 ms - 1073741822 ms (~1d - ~12d) * * HZ 300 * Level Offset Granularity Range * 0 0 3 ms 0 ms - 210 ms * 1 64 26 ms 213 ms - 1703 ms (213ms - ~1s) * 2 128 213 ms 1706 ms - 13650 ms (~1s - ~13s) * 3 192 1706 ms (~1s) 13653 ms - 109223 ms (~13s - ~1m) * 4 256 13653 ms (~13s) 109226 ms - 873810 ms (~1m - ~14m) * 5 320 109226 ms (~1m) 873813 ms - 6990503 ms (~14m - ~1h) * 6 384 873813 ms (~14m) 6990506 ms - 55924050 ms (~1h - ~15h) * 7 448 6990506 ms (~1h) 55924053 ms - 447392423 ms (~15h - ~5d) * 8 512 55924053 ms (~15h) 447392426 ms - 3579139406 ms (~5d - ~41d) * * HZ 250 * Level Offset Granularity Range * 0 0 4 ms 0 ms - 255 ms * 1 64 32 ms 256 ms - 2047 ms (256ms - ~2s) * 2 128 256 ms 2048 ms - 16383 ms (~2s - ~16s) * 3 192 2048 ms (~2s) 16384 ms - 131071 ms (~16s - ~2m) * 4 256 16384 ms (~16s) 131072 ms - 1048575 ms (~2m - ~17m) * 5 320 131072 ms (~2m) 1048576 ms - 8388607 ms (~17m - ~2h) * 6 384 1048576 ms (~17m) 8388608 ms - 67108863 ms (~2h - ~18h) * 7 448 8388608 ms (~2h) 67108864 ms - 536870911 ms (~18h - ~6d) * 8 512 67108864 ms (~18h) 536870912 ms - 4294967288 ms (~6d - ~49d) * * HZ 100 * Level Offset Granularity Range * 0 0 10 ms 0 ms - 630 ms * 1 64 80 ms 640 ms - 5110 ms (640ms - ~5s) * 2 128 640 ms 5120 ms - 40950 ms (~5s - ~40s) * 3 192 5120 ms (~5s) 40960 ms - 327670 ms (~40s - ~5m) * 4 256 40960 ms (~40s) 327680 ms - 2621430 ms (~5m - ~43m) * 5 320 327680 ms (~5m) 2621440 ms - 20971510 ms (~43m - ~5h) * 6 384 2621440 ms (~43m) 20971520 ms - 167772150 ms (~5h - ~1d) * 7 448 20971520 ms (~5h) 167772160 ms - 1342177270 ms (~1d - ~15d) */ /* Clock divisor for the next level */ #define LVL_CLK_SHIFT 3 #define LVL_CLK_DIV (1UL << LVL_CLK_SHIFT) #define LVL_CLK_MASK (LVL_CLK_DIV - 1) #define LVL_SHIFT(n) ((n) * LVL_CLK_SHIFT) #define LVL_GRAN(n) (1UL << LVL_SHIFT(n)) /* * The time start value for each level to select the bucket at enqueue * time. We start from the last possible delta of the previous level * so that we can later add an extra LVL_GRAN(n) to n (see calc_index()). */ #define LVL_START(n) ((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT)) /* Size of each clock level */ #define LVL_BITS 6 #define LVL_SIZE (1UL << LVL_BITS) #define LVL_MASK (LVL_SIZE - 1) #define LVL_OFFS(n) ((n) * LVL_SIZE) /* Level depth */ #if HZ > 100 # define LVL_DEPTH 9 # else # define LVL_DEPTH 8 #endif /* The cutoff (max. capacity of the wheel) */ #define WHEEL_TIMEOUT_CUTOFF (LVL_START(LVL_DEPTH)) #define WHEEL_TIMEOUT_MAX (WHEEL_TIMEOUT_CUTOFF - LVL_GRAN(LVL_DEPTH - 1)) /* * The resulting wheel size. If NOHZ is configured we allocate two * wheels so we have a separate storage for the deferrable timers. */ #define WHEEL_SIZE (LVL_SIZE * LVL_DEPTH) #ifdef CONFIG_NO_HZ_COMMON # define NR_BASES 2 # define BASE_STD 0 # define BASE_DEF 1 #else # define NR_BASES 1 # define BASE_STD 0 # define BASE_DEF 0 #endif struct timer_base { raw_spinlock_t lock; struct timer_list *running_timer; #ifdef CONFIG_PREEMPT_RT spinlock_t expiry_lock; atomic_t timer_waiters; #endif unsigned long clk; unsigned long next_expiry; unsigned int cpu; bool next_expiry_recalc; bool is_idle; bool timers_pending; DECLARE_BITMAP(pending_map, WHEEL_SIZE); struct hlist_head vectors[WHEEL_SIZE]; } ____cacheline_aligned; static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); #ifdef CONFIG_NO_HZ_COMMON static DEFINE_STATIC_KEY_FALSE(timers_nohz_active); static DEFINE_MUTEX(timer_keys_mutex); static void timer_update_keys(struct work_struct *work); static DECLARE_WORK(timer_update_work, timer_update_keys); #ifdef CONFIG_SMP static unsigned int sysctl_timer_migration = 1; DEFINE_STATIC_KEY_FALSE(timers_migration_enabled); static void timers_update_migration(void) { if (sysctl_timer_migration && tick_nohz_active) static_branch_enable(&timers_migration_enabled); else static_branch_disable(&timers_migration_enabled); } #ifdef CONFIG_SYSCTL static int timer_migration_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; mutex_lock(&timer_keys_mutex); ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) timers_update_migration(); mutex_unlock(&timer_keys_mutex); return ret; } static struct ctl_table timer_sysctl[] = { { .procname = "timer_migration", .data = &sysctl_timer_migration, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = timer_migration_handler, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, {} }; static int __init timer_sysctl_init(void) { register_sysctl("kernel", timer_sysctl); return 0; } device_initcall(timer_sysctl_init); #endif /* CONFIG_SYSCTL */ #else /* CONFIG_SMP */ static inline void timers_update_migration(void) { } #endif /* !CONFIG_SMP */ static void timer_update_keys(struct work_struct *work) { mutex_lock(&timer_keys_mutex); timers_update_migration(); static_branch_enable(&timers_nohz_active); mutex_unlock(&timer_keys_mutex); } void timers_update_nohz(void) { schedule_work(&timer_update_work); } static inline bool is_timers_nohz_active(void) { return static_branch_unlikely(&timers_nohz_active); } #else static inline bool is_timers_nohz_active(void) { return false; } #endif /* NO_HZ_COMMON */ static unsigned long round_jiffies_common(unsigned long j, int cpu, bool force_up) { int rem; unsigned long original = j; /* * We don't want all cpus firing their timers at once hitting the * same lock or cachelines, so we skew each extra cpu with an extra * 3 jiffies. This 3 jiffies came originally from the mm/ code which * already did this. * The skew is done by adding 3*cpunr, then round, then subtract this * extra offset again. */ j += cpu * 3; rem = j % HZ; /* * If the target jiffie is just after a whole second (which can happen * due to delays of the timer irq, long irq off times etc etc) then * we should round down to the whole second, not up. Use 1/4th second * as cutoff for this rounding as an extreme upper bound for this. * But never round down if @force_up is set. */ if (rem < HZ/4 && !force_up) /* round down */ j = j - rem; else /* round up */ j = j - rem + HZ; /* now that we have rounded, subtract the extra skew again */ j -= cpu * 3; /* * Make sure j is still in the future. Otherwise return the * unmodified value. */ return time_is_after_jiffies(j) ? j : original; } /** * __round_jiffies - function to round jiffies to a full second * @j: the time in (absolute) jiffies that should be rounded * @cpu: the processor number on which the timeout will happen * * __round_jiffies() rounds an absolute time in the future (in jiffies) * up or down to (approximately) full seconds. This is useful for timers * for which the exact time they fire does not matter too much, as long as * they fire approximately every X seconds. * * By rounding these timers to whole seconds, all such timers will fire * at the same time, rather than at various times spread out. The goal * of this is to have the CPU wake up less, which saves power. * * The exact rounding is skewed for each processor to avoid all * processors firing at the exact same time, which could lead * to lock contention or spurious cache line bouncing. * * The return value is the rounded version of the @j parameter. */ unsigned long __round_jiffies(unsigned long j, int cpu) { return round_jiffies_common(j, cpu, false); } EXPORT_SYMBOL_GPL(__round_jiffies); /** * __round_jiffies_relative - function to round jiffies to a full second * @j: the time in (relative) jiffies that should be rounded * @cpu: the processor number on which the timeout will happen * * __round_jiffies_relative() rounds a time delta in the future (in jiffies) * up or down to (approximately) full seconds. This is useful for timers * for which the exact time they fire does not matter too much, as long as * they fire approximately every X seconds. * * By rounding these timers to whole seconds, all such timers will fire * at the same time, rather than at various times spread out. The goal * of this is to have the CPU wake up less, which saves power. * * The exact rounding is skewed for each processor to avoid all * processors firing at the exact same time, which could lead * to lock contention or spurious cache line bouncing. * * The return value is the rounded version of the @j parameter. */ unsigned long __round_jiffies_relative(unsigned long j, int cpu) { unsigned long j0 = jiffies; /* Use j0 because jiffies might change while we run */ return round_jiffies_common(j + j0, cpu, false) - j0; } EXPORT_SYMBOL_GPL(__round_jiffies_relative); /** * round_jiffies - function to round jiffies to a full second * @j: the time in (absolute) jiffies that should be rounded * * round_jiffies() rounds an absolute time in the future (in jiffies) * up or down to (approximately) full seconds. This is useful for timers * for which the exact time they fire does not matter too much, as long as * they fire approximately every X seconds. * * By rounding these timers to whole seconds, all such timers will fire * at the same time, rather than at various times spread out. The goal * of this is to have the CPU wake up less, which saves power. * * The return value is the rounded version of the @j parameter. */ unsigned long round_jiffies(unsigned long j) { return round_jiffies_common(j, raw_smp_processor_id(), false); } EXPORT_SYMBOL_GPL(round_jiffies); /** * round_jiffies_relative - function to round jiffies to a full second * @j: the time in (relative) jiffies that should be rounded * * round_jiffies_relative() rounds a time delta in the future (in jiffies) * up or down to (approximately) full seconds. This is useful for timers * for which the exact time they fire does not matter too much, as long as * they fire approximately every X seconds. * * By rounding these timers to whole seconds, all such timers will fire * at the same time, rather than at various times spread out. The goal * of this is to have the CPU wake up less, which saves power. * * The return value is the rounded version of the @j parameter. */ unsigned long round_jiffies_relative(unsigned long j) { return __round_jiffies_relative(j, raw_smp_processor_id()); } EXPORT_SYMBOL_GPL(round_jiffies_relative); /** * __round_jiffies_up - function to round jiffies up to a full second * @j: the time in (absolute) jiffies that should be rounded * @cpu: the processor number on which the timeout will happen * * This is the same as __round_jiffies() except that it will never * round down. This is useful for timeouts for which the exact time * of firing does not matter too much, as long as they don't fire too * early. */ unsigned long __round_jiffies_up(unsigned long j, int cpu) { return round_jiffies_common(j, cpu, true); } EXPORT_SYMBOL_GPL(__round_jiffies_up); /** * __round_jiffies_up_relative - function to round jiffies up to a full second * @j: the time in (relative) jiffies that should be rounded * @cpu: the processor number on which the timeout will happen * * This is the same as __round_jiffies_relative() except that it will never * round down. This is useful for timeouts for which the exact time * of firing does not matter too much, as long as they don't fire too * early. */ unsigned long __round_jiffies_up_relative(unsigned long j, int cpu) { unsigned long j0 = jiffies; /* Use j0 because jiffies might change while we run */ return round_jiffies_common(j + j0, cpu, true) - j0; } EXPORT_SYMBOL_GPL(__round_jiffies_up_relative); /** * round_jiffies_up - function to round jiffies up to a full second * @j: the time in (absolute) jiffies that should be rounded * * This is the same as round_jiffies() except that it will never * round down. This is useful for timeouts for which the exact time * of firing does not matter too much, as long as they don't fire too * early. */ unsigned long round_jiffies_up(unsigned long j) { return round_jiffies_common(j, raw_smp_processor_id(), true); } EXPORT_SYMBOL_GPL(round_jiffies_up); /** * round_jiffies_up_relative - function to round jiffies up to a full second * @j: the time in (relative) jiffies that should be rounded * * This is the same as round_jiffies_relative() except that it will never * round down. This is useful for timeouts for which the exact time * of firing does not matter too much, as long as they don't fire too * early. */ unsigned long round_jiffies_up_relative(unsigned long j) { return __round_jiffies_up_relative(j, raw_smp_processor_id()); } EXPORT_SYMBOL_GPL(round_jiffies_up_relative); static inline unsigned int timer_get_idx(struct timer_list *timer) { return (timer->flags & TIMER_ARRAYMASK) >> TIMER_ARRAYSHIFT; } static inline void timer_set_idx(struct timer_list *timer, unsigned int idx) { timer->flags = (timer->flags & ~TIMER_ARRAYMASK) | idx << TIMER_ARRAYSHIFT; } /* * Helper function to calculate the array index for a given expiry * time. */ static inline unsigned calc_index(unsigned long expires, unsigned lvl, unsigned long *bucket_expiry) { /* * The timer wheel has to guarantee that a timer does not fire * early. Early expiry can happen due to: * - Timer is armed at the edge of a tick * - Truncation of the expiry time in the outer wheel levels * * Round up with level granularity to prevent this. */ expires = (expires >> LVL_SHIFT(lvl)) + 1; *bucket_expiry = expires << LVL_SHIFT(lvl); return LVL_OFFS(lvl) + (expires & LVL_MASK); } static int calc_wheel_index(unsigned long expires, unsigned long clk, unsigned long *bucket_expiry) { unsigned long delta = expires - clk; unsigned int idx; if (delta < LVL_START(1)) { idx = calc_index(expires, 0, bucket_expiry); } else if (delta < LVL_START(2)) { idx = calc_index(expires, 1, bucket_expiry); } else if (delta < LVL_START(3)) { idx = calc_index(expires, 2, bucket_expiry); } else if (delta < LVL_START(4)) { idx = calc_index(expires, 3, bucket_expiry); } else if (delta < LVL_START(5)) { idx = calc_index(expires, 4, bucket_expiry); } else if (delta < LVL_START(6)) { idx = calc_index(expires, 5, bucket_expiry); } else if (delta < LVL_START(7)) { idx = calc_index(expires, 6, bucket_expiry); } else if (LVL_DEPTH > 8 && delta < LVL_START(8)) { idx = calc_index(expires, 7, bucket_expiry); } else if ((long) delta < 0) { idx = clk & LVL_MASK; *bucket_expiry = clk; } else { /* * Force expire obscene large timeouts to expire at the * capacity limit of the wheel. */ if (delta >= WHEEL_TIMEOUT_CUTOFF) expires = clk + WHEEL_TIMEOUT_MAX; idx = calc_index(expires, LVL_DEPTH - 1, bucket_expiry); } return idx; } static void trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer) { if (!is_timers_nohz_active()) return; /* * TODO: This wants some optimizing similar to the code below, but we * will do that when we switch from push to pull for deferrable timers. */ if (timer->flags & TIMER_DEFERRABLE) { if (tick_nohz_full_cpu(base->cpu)) wake_up_nohz_cpu(base->cpu); return; } /* * We might have to IPI the remote CPU if the base is idle and the * timer is not deferrable. If the other CPU is on the way to idle * then it can't set base->is_idle as we hold the base lock: */ if (base->is_idle) wake_up_nohz_cpu(base->cpu); } /* * Enqueue the timer into the hash bucket, mark it pending in * the bitmap, store the index in the timer flags then wake up * the target CPU if needed. */ static void enqueue_timer(struct timer_base *base, struct timer_list *timer, unsigned int idx, unsigned long bucket_expiry) { hlist_add_head(&timer->entry, base->vectors + idx); __set_bit(idx, base->pending_map); timer_set_idx(timer, idx); trace_timer_start(timer, timer->expires, timer->flags); /* * Check whether this is the new first expiring timer. The * effective expiry time of the timer is required here * (bucket_expiry) instead of timer->expires. */ if (time_before(bucket_expiry, base->next_expiry)) { /* * Set the next expiry time and kick the CPU so it * can reevaluate the wheel: */ base->next_expiry = bucket_expiry; base->timers_pending = true; base->next_expiry_recalc = false; trigger_dyntick_cpu(base, timer); } } static void internal_add_timer(struct timer_base *base, struct timer_list *timer) { unsigned long bucket_expiry; unsigned int idx; idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry); enqueue_timer(base, timer, idx, bucket_expiry); } #ifdef CONFIG_DEBUG_OBJECTS_TIMERS static const struct debug_obj_descr timer_debug_descr; struct timer_hint { void (*function)(struct timer_list *t); long offset; }; #define TIMER_HINT(fn, container, timr, hintfn) \ { \ .function = fn, \ .offset = offsetof(container, hintfn) - \ offsetof(container, timr) \ } static const struct timer_hint timer_hints[] = { TIMER_HINT(delayed_work_timer_fn, struct delayed_work, timer, work.func), TIMER_HINT(kthread_delayed_work_timer_fn, struct kthread_delayed_work, timer, work.func), }; static void *timer_debug_hint(void *addr) { struct timer_list *timer = addr; int i; for (i = 0; i < ARRAY_SIZE(timer_hints); i++) { if (timer_hints[i].function == timer->function) { void (**fn)(void) = addr + timer_hints[i].offset; return *fn; } } return timer->function; } static bool timer_is_static_object(void *addr) { struct timer_list *timer = addr; return (timer->entry.pprev == NULL && timer->entry.next == TIMER_ENTRY_STATIC); } /* * fixup_init is called when: * - an active object is initialized */ static bool timer_fixup_init(void *addr, enum debug_obj_state state) { struct timer_list *timer = addr; switch (state) { case ODEBUG_STATE_ACTIVE: del_timer_sync(timer); debug_object_init(timer, &timer_debug_descr); return true; default: return false; } } /* Stub timer callback for improperly used timers. */ static void stub_timer(struct timer_list *unused) { WARN_ON(1); } /* * fixup_activate is called when: * - an active object is activated * - an unknown non-static object is activated */ static bool timer_fixup_activate(void *addr, enum debug_obj_state state) { struct timer_list *timer = addr; switch (state) { case ODEBUG_STATE_NOTAVAILABLE: timer_setup(timer, stub_timer, 0); return true; case ODEBUG_STATE_ACTIVE: WARN_ON(1); fallthrough; default: return false; } } /* * fixup_free is called when: * - an active object is freed */ static bool timer_fixup_free(void *addr, enum debug_obj_state state) { struct timer_list *timer = addr; switch (state) { case ODEBUG_STATE_ACTIVE: del_timer_sync(timer); debug_object_free(timer, &timer_debug_descr); return true; default: return false; } } /* * fixup_assert_init is called when: * - an untracked/uninit-ed object is found */ static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state) { struct timer_list *timer = addr; switch (state) { case ODEBUG_STATE_NOTAVAILABLE: timer_setup(timer, stub_timer, 0); return true; default: return false; } } static const struct debug_obj_descr timer_debug_descr = { .name = "timer_list", .debug_hint = timer_debug_hint, .is_static_object = timer_is_static_object, .fixup_init = timer_fixup_init, .fixup_activate = timer_fixup_activate, .fixup_free = timer_fixup_free, .fixup_assert_init = timer_fixup_assert_init, }; static inline void debug_timer_init(struct timer_list *timer) { debug_object_init(timer, &timer_debug_descr); } static inline void debug_timer_activate(struct timer_list *timer) { debug_object_activate(timer, &timer_debug_descr); } static inline void debug_timer_deactivate(struct timer_list *timer) { debug_object_deactivate(timer, &timer_debug_descr); } static inline void debug_timer_assert_init(struct timer_list *timer) { debug_object_assert_init(timer, &timer_debug_descr); } static void do_init_timer(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key); void init_timer_on_stack_key(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key) { debug_object_init_on_stack(timer, &timer_debug_descr); do_init_timer(timer, func, flags, name, key); } EXPORT_SYMBOL_GPL(init_timer_on_stack_key); void destroy_timer_on_stack(struct timer_list *timer) { debug_object_free(timer, &timer_debug_descr); } EXPORT_SYMBOL_GPL(destroy_timer_on_stack); #else static inline void debug_timer_init(struct timer_list *timer) { } static inline void debug_timer_activate(struct timer_list *timer) { } static inline void debug_timer_deactivate(struct timer_list *timer) { } static inline void debug_timer_assert_init(struct timer_list *timer) { } #endif static inline void debug_init(struct timer_list *timer) { debug_timer_init(timer); trace_timer_init(timer); } static inline void debug_deactivate(struct timer_list *timer) { debug_timer_deactivate(timer); trace_timer_cancel(timer); } static inline void debug_assert_init(struct timer_list *timer) { debug_timer_assert_init(timer); } static void do_init_timer(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key) { timer->entry.pprev = NULL; timer->function = func; if (WARN_ON_ONCE(flags & ~TIMER_INIT_FLAGS)) flags &= TIMER_INIT_FLAGS; timer->flags = flags | raw_smp_processor_id(); lockdep_init_map(&timer->lockdep_map, name, key, 0); } /** * init_timer_key - initialize a timer * @timer: the timer to be initialized * @func: timer callback function * @flags: timer flags * @name: name of the timer * @key: lockdep class key of the fake lock used for tracking timer * sync lock dependencies * * init_timer_key() must be done to a timer prior calling *any* of the * other timer functions. */ void init_timer_key(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key) { debug_init(timer); do_init_timer(timer, func, flags, name, key); } EXPORT_SYMBOL(init_timer_key); static inline void detach_timer(struct timer_list *timer, bool clear_pending) { struct hlist_node *entry = &timer->entry; debug_deactivate(timer); __hlist_del(entry); if (clear_pending) entry->pprev = NULL; entry->next = LIST_POISON2; } static int detach_if_pending(struct timer_list *timer, struct timer_base *base, bool clear_pending) { unsigned idx = timer_get_idx(timer); if (!timer_pending(timer)) return 0; if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) { __clear_bit(idx, base->pending_map); base->next_expiry_recalc = true; } detach_timer(timer, clear_pending); return 1; } static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu) { struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); /* * If the timer is deferrable and NO_HZ_COMMON is set then we need * to use the deferrable base. */ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); return base; } static inline struct timer_base *get_timer_this_cpu_base(u32 tflags) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); /* * If the timer is deferrable and NO_HZ_COMMON is set then we need * to use the deferrable base. */ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) base = this_cpu_ptr(&timer_bases[BASE_DEF]); return base; } static inline struct timer_base *get_timer_base(u32 tflags) { return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK); } static inline struct timer_base * get_target_base(struct timer_base *base, unsigned tflags) { #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) if (static_branch_likely(&timers_migration_enabled) && !(tflags & TIMER_PINNED)) return get_timer_cpu_base(tflags, get_nohz_timer_target()); #endif return get_timer_this_cpu_base(tflags); } static inline void forward_timer_base(struct timer_base *base) { unsigned long jnow = READ_ONCE(jiffies); /* * No need to forward if we are close enough below jiffies. * Also while executing timers, base->clk is 1 offset ahead * of jiffies to avoid endless requeuing to current jiffies. */ if ((long)(jnow - base->clk) < 1) return; /* * If the next expiry value is > jiffies, then we fast forward to * jiffies otherwise we forward to the next expiry value. */ if (time_after(base->next_expiry, jnow)) { base->clk = jnow; } else { if (WARN_ON_ONCE(time_before(base->next_expiry, base->clk))) return; base->clk = base->next_expiry; } } /* * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means * that all timers which are tied to this base are locked, and the base itself * is locked too. * * So __run_timers/migrate_timers can safely modify all timers which could * be found in the base->vectors array. * * When a timer is migrating then the TIMER_MIGRATING flag is set and we need * to wait until the migration is done. */ static struct timer_base *lock_timer_base(struct timer_list *timer, unsigned long *flags) __acquires(timer->base->lock) { for (;;) { struct timer_base *base; u32 tf; /* * We need to use READ_ONCE() here, otherwise the compiler * might re-read @tf between the check for TIMER_MIGRATING * and spin_lock(). */ tf = READ_ONCE(timer->flags); if (!(tf & TIMER_MIGRATING)) { base = get_timer_base(tf); raw_spin_lock_irqsave(&base->lock, *flags); if (timer->flags == tf) return base; raw_spin_unlock_irqrestore(&base->lock, *flags); } cpu_relax(); } } #define MOD_TIMER_PENDING_ONLY 0x01 #define MOD_TIMER_REDUCE 0x02 #define MOD_TIMER_NOTPENDING 0x04 static inline int __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options) { unsigned long clk = 0, flags, bucket_expiry; struct timer_base *base, *new_base; unsigned int idx = UINT_MAX; int ret = 0; debug_assert_init(timer); /* * This is a common optimization triggered by the networking code - if * the timer is re-modified to have the same timeout or ends up in the * same array bucket then just return: */ if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) { /* * The downside of this optimization is that it can result in * larger granularity than you would get from adding a new * timer with this expiry. */ long diff = timer->expires - expires; if (!diff) return 1; if (options & MOD_TIMER_REDUCE && diff <= 0) return 1; /* * We lock timer base and calculate the bucket index right * here. If the timer ends up in the same bucket, then we * just update the expiry time and avoid the whole * dequeue/enqueue dance. */ base = lock_timer_base(timer, &flags); /* * Has @timer been shutdown? This needs to be evaluated * while holding base lock to prevent a race against the * shutdown code. */ if (!timer->function) goto out_unlock; forward_timer_base(base); if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) && time_before_eq(timer->expires, expires)) { ret = 1; goto out_unlock; } clk = base->clk; idx = calc_wheel_index(expires, clk, &bucket_expiry); /* * Retrieve and compare the array index of the pending * timer. If it matches set the expiry to the new value so a * subsequent call will exit in the expires check above. */ if (idx == timer_get_idx(timer)) { if (!(options & MOD_TIMER_REDUCE)) timer->expires = expires; else if (time_after(timer->expires, expires)) timer->expires = expires; ret = 1; goto out_unlock; } } else { base = lock_timer_base(timer, &flags); /* * Has @timer been shutdown? This needs to be evaluated * while holding base lock to prevent a race against the * shutdown code. */ if (!timer->function) goto out_unlock; forward_timer_base(base); } ret = detach_if_pending(timer, base, false); if (!ret && (options & MOD_TIMER_PENDING_ONLY)) goto out_unlock; new_base = get_target_base(base, timer->flags); if (base != new_base) { /* * We are trying to schedule the timer on the new base. * However we can't change timer's base while it is running, * otherwise timer_delete_sync() can't detect that the timer's * handler yet has not finished. This also guarantees that the * timer is serialized wrt itself. */ if (likely(base->running_timer != timer)) { /* See the comment in lock_timer_base() */ timer->flags |= TIMER_MIGRATING; raw_spin_unlock(&base->lock); base = new_base; raw_spin_lock(&base->lock); WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | base->cpu); forward_timer_base(base); } } debug_timer_activate(timer); timer->expires = expires; /* * If 'idx' was calculated above and the base time did not advance * between calculating 'idx' and possibly switching the base, only * enqueue_timer() is required. Otherwise we need to (re)calculate * the wheel index via internal_add_timer(). */ if (idx != UINT_MAX && clk == base->clk) enqueue_timer(base, timer, idx, bucket_expiry); else internal_add_timer(base, timer); out_unlock: raw_spin_unlock_irqrestore(&base->lock, flags); return ret; } /** * mod_timer_pending - Modify a pending timer's timeout * @timer: The pending timer to be modified * @expires: New absolute timeout in jiffies * * mod_timer_pending() is the same for pending timers as mod_timer(), but * will not activate inactive timers. * * If @timer->function == NULL then the start operation is silently * discarded. * * Return: * * %0 - The timer was inactive and not modified or was in * shutdown state and the operation was discarded * * %1 - The timer was active and requeued to expire at @expires */ int mod_timer_pending(struct timer_list *timer, unsigned long expires) { return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY); } EXPORT_SYMBOL(mod_timer_pending); /** * mod_timer - Modify a timer's timeout * @timer: The timer to be modified * @expires: New absolute timeout in jiffies * * mod_timer(timer, expires) is equivalent to: * * del_timer(timer); timer->expires = expires; add_timer(timer); * * mod_timer() is more efficient than the above open coded sequence. In * case that the timer is inactive, the del_timer() part is a NOP. The * timer is in any case activated with the new expiry time @expires. * * Note that if there are multiple unserialized concurrent users of the * same timer, then mod_timer() is the only safe way to modify the timeout, * since add_timer() cannot modify an already running timer. * * If @timer->function == NULL then the start operation is silently * discarded. In this case the return value is 0 and meaningless. * * Return: * * %0 - The timer was inactive and started or was in shutdown * state and the operation was discarded * * %1 - The timer was active and requeued to expire at @expires or * the timer was active and not modified because @expires did * not change the effective expiry time */ int mod_timer(struct timer_list *timer, unsigned long expires) { return __mod_timer(timer, expires, 0); } EXPORT_SYMBOL(mod_timer); /** * timer_reduce - Modify a timer's timeout if it would reduce the timeout * @timer: The timer to be modified * @expires: New absolute timeout in jiffies * * timer_reduce() is very similar to mod_timer(), except that it will only * modify an enqueued timer if that would reduce the expiration time. If * @timer is not enqueued it starts the timer. * * If @timer->function == NULL then the start operation is silently * discarded. * * Return: * * %0 - The timer was inactive and started or was in shutdown * state and the operation was discarded * * %1 - The timer was active and requeued to expire at @expires or * the timer was active and not modified because @expires * did not change the effective expiry time such that the * timer would expire earlier than already scheduled */ int timer_reduce(struct timer_list *timer, unsigned long expires) { return __mod_timer(timer, expires, MOD_TIMER_REDUCE); } EXPORT_SYMBOL(timer_reduce); /** * add_timer - Start a timer * @timer: The timer to be started * * Start @timer to expire at @timer->expires in the future. @timer->expires * is the absolute expiry time measured in 'jiffies'. When the timer expires * timer->function(timer) will be invoked from soft interrupt context. * * The @timer->expires and @timer->function fields must be set prior * to calling this function. * * If @timer->function == NULL then the start operation is silently * discarded. * * If @timer->expires is already in the past @timer will be queued to * expire at the next timer tick. * * This can only operate on an inactive timer. Attempts to invoke this on * an active timer are rejected with a warning. */ void add_timer(struct timer_list *timer) { if (WARN_ON_ONCE(timer_pending(timer))) return; __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING); } EXPORT_SYMBOL(add_timer); /** * add_timer_on - Start a timer on a particular CPU * @timer: The timer to be started * @cpu: The CPU to start it on * * Same as add_timer() except that it starts the timer on the given CPU. * * See add_timer() for further details. */ void add_timer_on(struct timer_list *timer, int cpu) { struct timer_base *new_base, *base; unsigned long flags; debug_assert_init(timer); if (WARN_ON_ONCE(timer_pending(timer))) return; new_base = get_timer_cpu_base(timer->flags, cpu); /* * If @timer was on a different CPU, it should be migrated with the * old base locked to prevent other operations proceeding with the * wrong base locked. See lock_timer_base(). */ base = lock_timer_base(timer, &flags); /* * Has @timer been shutdown? This needs to be evaluated while * holding base lock to prevent a race against the shutdown code. */ if (!timer->function) goto out_unlock; if (base != new_base) { timer->flags |= TIMER_MIGRATING; raw_spin_unlock(&base->lock); base = new_base; raw_spin_lock(&base->lock); WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | cpu); } forward_timer_base(base); debug_timer_activate(timer); internal_add_timer(base, timer); out_unlock: raw_spin_unlock_irqrestore(&base->lock, flags); } EXPORT_SYMBOL_GPL(add_timer_on); /** * __timer_delete - Internal function: Deactivate a timer * @timer: The timer to be deactivated * @shutdown: If true, this indicates that the timer is about to be * shutdown permanently. * * If @shutdown is true then @timer->function is set to NULL under the * timer base lock which prevents further rearming of the time. In that * case any attempt to rearm @timer after this function returns will be * silently ignored. * * Return: * * %0 - The timer was not pending * * %1 - The timer was pending and deactivated */ static int __timer_delete(struct timer_list *timer, bool shutdown) { struct timer_base *base; unsigned long flags; int ret = 0; debug_assert_init(timer); /* * If @shutdown is set then the lock has to be taken whether the * timer is pending or not to protect against a concurrent rearm * which might hit between the lockless pending check and the lock * aquisition. By taking the lock it is ensured that such a newly * enqueued timer is dequeued and cannot end up with * timer->function == NULL in the expiry code. * * If timer->function is currently executed, then this makes sure * that the callback cannot requeue the timer. */ if (timer_pending(timer) || shutdown) { base = lock_timer_base(timer, &flags); ret = detach_if_pending(timer, base, true); if (shutdown) timer->function = NULL; raw_spin_unlock_irqrestore(&base->lock, flags); } return ret; } /** * timer_delete - Deactivate a timer * @timer: The timer to be deactivated * * The function only deactivates a pending timer, but contrary to * timer_delete_sync() it does not take into account whether the timer's * callback function is concurrently executed on a different CPU or not. * It neither prevents rearming of the timer. If @timer can be rearmed * concurrently then the return value of this function is meaningless. * * Return: * * %0 - The timer was not pending * * %1 - The timer was pending and deactivated */ int timer_delete(struct timer_list *timer) { return __timer_delete(timer, false); } EXPORT_SYMBOL(timer_delete); /** * timer_shutdown - Deactivate a timer and prevent rearming * @timer: The timer to be deactivated * * The function does not wait for an eventually running timer callback on a * different CPU but it prevents rearming of the timer. Any attempt to arm * @timer after this function returns will be silently ignored. * * This function is useful for teardown code and should only be used when * timer_shutdown_sync() cannot be invoked due to locking or context constraints. * * Return: * * %0 - The timer was not pending * * %1 - The timer was pending */ int timer_shutdown(struct timer_list *timer) { return __timer_delete(timer, true); } EXPORT_SYMBOL_GPL(timer_shutdown); /** * __try_to_del_timer_sync - Internal function: Try to deactivate a timer * @timer: Timer to deactivate * @shutdown: If true, this indicates that the timer is about to be * shutdown permanently. * * If @shutdown is true then @timer->function is set to NULL under the * timer base lock which prevents further rearming of the timer. Any * attempt to rearm @timer after this function returns will be silently * ignored. * * This function cannot guarantee that the timer cannot be rearmed * right after dropping the base lock if @shutdown is false. That * needs to be prevented by the calling code if necessary. * * Return: * * %0 - The timer was not pending * * %1 - The timer was pending and deactivated * * %-1 - The timer callback function is running on a different CPU */ static int __try_to_del_timer_sync(struct timer_list *timer, bool shutdown) { struct timer_base *base; unsigned long flags; int ret = -1; debug_assert_init(timer); base = lock_timer_base(timer, &flags); if (base->running_timer != timer) ret = detach_if_pending(timer, base, true); if (shutdown) timer->function = NULL; raw_spin_unlock_irqrestore(&base->lock, flags); return ret; } /** * try_to_del_timer_sync - Try to deactivate a timer * @timer: Timer to deactivate * * This function tries to deactivate a timer. On success the timer is not * queued and the timer callback function is not running on any CPU. * * This function does not guarantee that the timer cannot be rearmed right * after dropping the base lock. That needs to be prevented by the calling * code if necessary. * * Return: * * %0 - The timer was not pending * * %1 - The timer was pending and deactivated * * %-1 - The timer callback function is running on a different CPU */ int try_to_del_timer_sync(struct timer_list *timer) { return __try_to_del_timer_sync(timer, false); } EXPORT_SYMBOL(try_to_del_timer_sync); #ifdef CONFIG_PREEMPT_RT static __init void timer_base_init_expiry_lock(struct timer_base *base) { spin_lock_init(&base->expiry_lock); } static inline void timer_base_lock_expiry(struct timer_base *base) { spin_lock(&base->expiry_lock); } static inline void timer_base_unlock_expiry(struct timer_base *base) { spin_unlock(&base->expiry_lock); } /* * The counterpart to del_timer_wait_running(). * * If there is a waiter for base->expiry_lock, then it was waiting for the * timer callback to finish. Drop expiry_lock and reacquire it. That allows * the waiter to acquire the lock and make progress. */ static void timer_sync_wait_running(struct timer_base *base) { if (atomic_read(&base->timer_waiters)) { raw_spin_unlock_irq(&base->lock); spin_unlock(&base->expiry_lock); spin_lock(&base->expiry_lock); raw_spin_lock_irq(&base->lock); } } /* * This function is called on PREEMPT_RT kernels when the fast path * deletion of a timer failed because the timer callback function was * running. * * This prevents priority inversion, if the softirq thread on a remote CPU * got preempted, and it prevents a life lock when the task which tries to * delete a timer preempted the softirq thread running the timer callback * function. */ static void del_timer_wait_running(struct timer_list *timer) { u32 tf; tf = READ_ONCE(timer->flags); if (!(tf & (TIMER_MIGRATING | TIMER_IRQSAFE))) { struct timer_base *base = get_timer_base(tf); /* * Mark the base as contended and grab the expiry lock, * which is held by the softirq across the timer * callback. Drop the lock immediately so the softirq can * expire the next timer. In theory the timer could already * be running again, but that's more than unlikely and just * causes another wait loop. */ atomic_inc(&base->timer_waiters); spin_lock_bh(&base->expiry_lock); atomic_dec(&base->timer_waiters); spin_unlock_bh(&base->expiry_lock); } } #else static inline void timer_base_init_expiry_lock(struct timer_base *base) { } static inline void timer_base_lock_expiry(struct timer_base *base) { } static inline void timer_base_unlock_expiry(struct timer_base *base) { } static inline void timer_sync_wait_running(struct timer_base *base) { } static inline void del_timer_wait_running(struct timer_list *timer) { } #endif /** * __timer_delete_sync - Internal function: Deactivate a timer and wait * for the handler to finish. * @timer: The timer to be deactivated * @shutdown: If true, @timer->function will be set to NULL under the * timer base lock which prevents rearming of @timer * * If @shutdown is not set the timer can be rearmed later. If the timer can * be rearmed concurrently, i.e. after dropping the base lock then the * return value is meaningless. * * If @shutdown is set then @timer->function is set to NULL under timer * base lock which prevents rearming of the timer. Any attempt to rearm * a shutdown timer is silently ignored. * * If the timer should be reused after shutdown it has to be initialized * again. * * Return: * * %0 - The timer was not pending * * %1 - The timer was pending and deactivated */ static int __timer_delete_sync(struct timer_list *timer, bool shutdown) { int ret; #ifdef CONFIG_LOCKDEP unsigned long flags; /* * If lockdep gives a backtrace here, please reference * the synchronization rules above. */ local_irq_save(flags); lock_map_acquire(&timer->lockdep_map); lock_map_release(&timer->lockdep_map); local_irq_restore(flags); #endif /* * don't use it in hardirq context, because it * could lead to deadlock. */ WARN_ON(in_hardirq() && !(timer->flags & TIMER_IRQSAFE)); /* * Must be able to sleep on PREEMPT_RT because of the slowpath in * del_timer_wait_running(). */ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(timer->flags & TIMER_IRQSAFE)) lockdep_assert_preemption_enabled(); do { ret = __try_to_del_timer_sync(timer, shutdown); if (unlikely(ret < 0)) { del_timer_wait_running(timer); cpu_relax(); } } while (ret < 0); return ret; } /** * timer_delete_sync - Deactivate a timer and wait for the handler to finish. * @timer: The timer to be deactivated * * Synchronization rules: Callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from * interrupt contexts unless the timer is an irqsafe one. The caller must * not hold locks which would prevent completion of the timer's callback * function. The timer's handler must not call add_timer_on(). Upon exit * the timer is not queued and the handler is not running on any CPU. * * For !irqsafe timers, the caller must not hold locks that are held in * interrupt context. Even if the lock has nothing to do with the timer in * question. Here's why:: * * CPU0 CPU1 * ---- ---- * <SOFTIRQ> * call_timer_fn(); * base->running_timer = mytimer; * spin_lock_irq(somelock); * <IRQ> * spin_lock(somelock); * timer_delete_sync(mytimer); * while (base->running_timer == mytimer); * * Now timer_delete_sync() will never return and never release somelock. * The interrupt on the other CPU is waiting to grab somelock but it has * interrupted the softirq that CPU0 is waiting to finish. * * This function cannot guarantee that the timer is not rearmed again by * some concurrent or preempting code, right after it dropped the base * lock. If there is the possibility of a concurrent rearm then the return * value of the function is meaningless. * * If such a guarantee is needed, e.g. for teardown situations then use * timer_shutdown_sync() instead. * * Return: * * %0 - The timer was not pending * * %1 - The timer was pending and deactivated */ int timer_delete_sync(struct timer_list *timer) { return __timer_delete_sync(timer, false); } EXPORT_SYMBOL(timer_delete_sync); /** * timer_shutdown_sync - Shutdown a timer and prevent rearming * @timer: The timer to be shutdown * * When the function returns it is guaranteed that: * - @timer is not queued * - The callback function of @timer is not running * - @timer cannot be enqueued again. Any attempt to rearm * @timer is silently ignored. * * See timer_delete_sync() for synchronization rules. * * This function is useful for final teardown of an infrastructure where * the timer is subject to a circular dependency problem. * * A common pattern for this is a timer and a workqueue where the timer can * schedule work and work can arm the timer. On shutdown the workqueue must * be destroyed and the timer must be prevented from rearming. Unless the * code has conditionals like 'if (mything->in_shutdown)' to prevent that * there is no way to get this correct with timer_delete_sync(). * * timer_shutdown_sync() is solving the problem. The correct ordering of * calls in this case is: * * timer_shutdown_sync(&mything->timer); * workqueue_destroy(&mything->workqueue); * * After this 'mything' can be safely freed. * * This obviously implies that the timer is not required to be functional * for the rest of the shutdown operation. * * Return: * * %0 - The timer was not pending * * %1 - The timer was pending */ int timer_shutdown_sync(struct timer_list *timer) { return __timer_delete_sync(timer, true); } EXPORT_SYMBOL_GPL(timer_shutdown_sync); static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *), unsigned long baseclk) { int count = preempt_count(); #ifdef CONFIG_LOCKDEP /* * It is permissible to free the timer from inside the * function that is called from it, this we need to take into * account for lockdep too. To avoid bogus "held lock freed" * warnings as well as problems when looking into * timer->lockdep_map, make a copy and use that here. */ struct lockdep_map lockdep_map; lockdep_copy_map(&lockdep_map, &timer->lockdep_map); #endif /* * Couple the lock chain with the lock chain at * timer_delete_sync() by acquiring the lock_map around the fn() * call here and in timer_delete_sync(). */ lock_map_acquire(&lockdep_map); trace_timer_expire_entry(timer, baseclk); fn(timer); trace_timer_expire_exit(timer); lock_map_release(&lockdep_map); if (count != preempt_count()) { WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n", fn, count, preempt_count()); /* * Restore the preempt count. That gives us a decent * chance to survive and extract information. If the * callback kept a lock held, bad luck, but not worse * than the BUG() we had. */ preempt_count_set(count); } } static void expire_timers(struct timer_base *base, struct hlist_head *head) { /* * This value is required only for tracing. base->clk was * incremented directly before expire_timers was called. But expiry * is related to the old base->clk value. */ unsigned long baseclk = base->clk - 1; while (!hlist_empty(head)) { struct timer_list *timer; void (*fn)(struct timer_list *); timer = hlist_entry(head->first, struct timer_list, entry); base->running_timer = timer; detach_timer(timer, true); fn = timer->function; if (WARN_ON_ONCE(!fn)) { /* Should never happen. Emphasis on should! */ base->running_timer = NULL; continue; } if (timer->flags & TIMER_IRQSAFE) { raw_spin_unlock(&base->lock); call_timer_fn(timer, fn, baseclk); raw_spin_lock(&base->lock); base->running_timer = NULL; } else { raw_spin_unlock_irq(&base->lock); call_timer_fn(timer, fn, baseclk); raw_spin_lock_irq(&base->lock); base->running_timer = NULL; timer_sync_wait_running(base); } } } static int collect_expired_timers(struct timer_base *base, struct hlist_head *heads) { unsigned long clk = base->clk = base->next_expiry; struct hlist_head *vec; int i, levels = 0; unsigned int idx; for (i = 0; i < LVL_DEPTH; i++) { idx = (clk & LVL_MASK) + i * LVL_SIZE; if (__test_and_clear_bit(idx, base->pending_map)) { vec = base->vectors + idx; hlist_move_list(vec, heads++); levels++; } /* Is it time to look at the next level? */ if (clk & LVL_CLK_MASK) break; /* Shift clock for the next level granularity */ clk >>= LVL_CLK_SHIFT; } return levels; } /* * Find the next pending bucket of a level. Search from level start (@offset) * + @clk upwards and if nothing there, search from start of the level * (@offset) up to @offset + clk. */ static int next_pending_bucket(struct timer_base *base, unsigned offset, unsigned clk) { unsigned pos, start = offset + clk; unsigned end = offset + LVL_SIZE; pos = find_next_bit(base->pending_map, end, start); if (pos < end) return pos - start; pos = find_next_bit(base->pending_map, start, offset); return pos < start ? pos + LVL_SIZE - start : -1; } /* * Search the first expiring timer in the various clock levels. Caller must * hold base->lock. */ static unsigned long __next_timer_interrupt(struct timer_base *base) { unsigned long clk, next, adj; unsigned lvl, offset = 0; next = base->clk + NEXT_TIMER_MAX_DELTA; clk = base->clk; for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) { int pos = next_pending_bucket(base, offset, clk & LVL_MASK); unsigned long lvl_clk = clk & LVL_CLK_MASK; if (pos >= 0) { unsigned long tmp = clk + (unsigned long) pos; tmp <<= LVL_SHIFT(lvl); if (time_before(tmp, next)) next = tmp; /* * If the next expiration happens before we reach * the next level, no need to check further. */ if (pos <= ((LVL_CLK_DIV - lvl_clk) & LVL_CLK_MASK)) break; } /* * Clock for the next level. If the current level clock lower * bits are zero, we look at the next level as is. If not we * need to advance it by one because that's going to be the * next expiring bucket in that level. base->clk is the next * expiring jiffie. So in case of: * * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0 * 0 0 0 0 0 0 * * we have to look at all levels @index 0. With * * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0 * 0 0 0 0 0 2 * * LVL0 has the next expiring bucket @index 2. The upper * levels have the next expiring bucket @index 1. * * In case that the propagation wraps the next level the same * rules apply: * * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0 * 0 0 0 0 F 2 * * So after looking at LVL0 we get: * * LVL5 LVL4 LVL3 LVL2 LVL1 * 0 0 0 1 0 * * So no propagation from LVL1 to LVL2 because that happened * with the add already, but then we need to propagate further * from LVL2 to LVL3. * * So the simple check whether the lower bits of the current * level are 0 or not is sufficient for all cases. */ adj = lvl_clk ? 1 : 0; clk >>= LVL_CLK_SHIFT; clk += adj; } base->next_expiry_recalc = false; base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA); return next; } #ifdef CONFIG_NO_HZ_COMMON /* * Check, if the next hrtimer event is before the next timer wheel * event: */ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) { u64 nextevt = hrtimer_get_next_event(); /* * If high resolution timers are enabled * hrtimer_get_next_event() returns KTIME_MAX. */ if (expires <= nextevt) return expires; /* * If the next timer is already expired, return the tick base * time so the tick is fired immediately. */ if (nextevt <= basem) return basem; /* * Round up to the next jiffie. High resolution timers are * off, so the hrtimers are expired in the tick and we need to * make sure that this tick really expires the timer to avoid * a ping pong of the nohz stop code. * * Use DIV_ROUND_UP_ULL to prevent gcc calling __divdi3 */ return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC; } /** * get_next_timer_interrupt - return the time (clock mono) of the next timer * @basej: base time jiffies * @basem: base time clock monotonic * * Returns the tick aligned clock monotonic time of the next pending * timer or KTIME_MAX if no timer is pending. */ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); u64 expires = KTIME_MAX; unsigned long nextevt; /* * Pretend that there is no timer pending if the cpu is offline. * Possible pending timers will be migrated later to an active cpu. */ if (cpu_is_offline(smp_processor_id())) return expires; raw_spin_lock(&base->lock); if (base->next_expiry_recalc) base->next_expiry = __next_timer_interrupt(base); nextevt = base->next_expiry; /* * We have a fresh next event. Check whether we can forward the * base. We can only do that when @basej is past base->clk * otherwise we might rewind base->clk. */ if (time_after(basej, base->clk)) { if (time_after(nextevt, basej)) base->clk = basej; else if (time_after(nextevt, base->clk)) base->clk = nextevt; } if (time_before_eq(nextevt, basej)) { expires = basem; base->is_idle = false; } else { if (base->timers_pending) expires = basem + (u64)(nextevt - basej) * TICK_NSEC; /* * If we expect to sleep more than a tick, mark the base idle. * Also the tick is stopped so any added timer must forward * the base clk itself to keep granularity small. This idle * logic is only maintained for the BASE_STD base, deferrable * timers may still see large granularity skew (by design). */ if ((expires - basem) > TICK_NSEC) base->is_idle = true; } raw_spin_unlock(&base->lock); return cmp_next_hrtimer_event(basem, expires); } /** * timer_clear_idle - Clear the idle state of the timer base * * Called with interrupts disabled */ void timer_clear_idle(void) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); /* * We do this unlocked. The worst outcome is a remote enqueue sending * a pointless IPI, but taking the lock would just make the window for * sending the IPI a few instructions smaller for the cost of taking * the lock in the exit from idle path. */ base->is_idle = false; } #endif /** * __run_timers - run all expired timers (if any) on this CPU. * @base: the timer vector to be processed. */ static inline void __run_timers(struct timer_base *base) { struct hlist_head heads[LVL_DEPTH]; int levels; if (time_before(jiffies, base->next_expiry)) return; timer_base_lock_expiry(base); raw_spin_lock_irq(&base->lock); while (time_after_eq(jiffies, base->clk) && time_after_eq(jiffies, base->next_expiry)) { levels = collect_expired_timers(base, heads); /* * The two possible reasons for not finding any expired * timer at this clk are that all matching timers have been * dequeued or no timer has been queued since * base::next_expiry was set to base::clk + * NEXT_TIMER_MAX_DELTA. */ WARN_ON_ONCE(!levels && !base->next_expiry_recalc && base->timers_pending); base->clk++; base->next_expiry = __next_timer_interrupt(base); while (levels--) expire_timers(base, heads + levels); } raw_spin_unlock_irq(&base->lock); timer_base_unlock_expiry(base); } /* * This function runs timers and the timer-tq in bottom half context. */ static __latent_entropy void run_timer_softirq(struct softirq_action *h) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); __run_timers(base); if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); } /* * Called by the local, per-CPU timer interrupt on SMP. */ static void run_local_timers(void) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); hrtimer_run_queues(); /* Raise the softirq only if required. */ if (time_before(jiffies, base->next_expiry)) { if (!IS_ENABLED(CONFIG_NO_HZ_COMMON)) return; /* CPU is awake, so check the deferrable base. */ base++; if (time_before(jiffies, base->next_expiry)) return; } raise_softirq(TIMER_SOFTIRQ); } /* * Called from the timer interrupt handler to charge one tick to the current * process. user_tick is 1 if the tick is user time, 0 for system. */ void update_process_times(int user_tick) { struct task_struct *p = current; /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); run_local_timers(); rcu_sched_clock_irq(user_tick); #ifdef CONFIG_IRQ_WORK if (in_irq()) irq_work_tick(); #endif scheduler_tick(); if (IS_ENABLED(CONFIG_POSIX_TIMERS)) run_posix_cpu_timers(); } /* * Since schedule_timeout()'s timer is defined on the stack, it must store * the target task on the stack as well. */ struct process_timer { struct timer_list timer; struct task_struct *task; }; static void process_timeout(struct timer_list *t) { struct process_timer *timeout = from_timer(timeout, t, timer); wake_up_process(timeout->task); } /** * schedule_timeout - sleep until timeout * @timeout: timeout value in jiffies * * Make the current task sleep until @timeout jiffies have elapsed. * The function behavior depends on the current task state * (see also set_current_state() description): * * %TASK_RUNNING - the scheduler is called, but the task does not sleep * at all. That happens because sched_submit_work() does nothing for * tasks in %TASK_RUNNING state. * * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to * pass before the routine returns unless the current task is explicitly * woken up, (e.g. by wake_up_process()). * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is * delivered to the current task or the current task is explicitly woken * up. * * The current task state is guaranteed to be %TASK_RUNNING when this * routine returns. * * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule * the CPU away without a bound on the timeout. In this case the return * value will be %MAX_SCHEDULE_TIMEOUT. * * Returns 0 when the timer has expired otherwise the remaining time in * jiffies will be returned. In all cases the return value is guaranteed * to be non-negative. */ signed long __sched schedule_timeout(signed long timeout) { struct process_timer timer; unsigned long expire; switch (timeout) { case MAX_SCHEDULE_TIMEOUT: /* * These two special cases are useful to be comfortable * in the caller. Nothing more. We could take * MAX_SCHEDULE_TIMEOUT from one of the negative value * but I' d like to return a valid offset (>=0) to allow * the caller to do everything it want with the retval. */ schedule(); goto out; default: /* * Another bit of PARANOID. Note that the retval will be * 0 since no piece of kernel is supposed to do a check * for a negative retval of schedule_timeout() (since it * should never happens anyway). You just have the printk() * that will tell you if something is gone wrong and where. */ if (timeout < 0) { printk(KERN_ERR "schedule_timeout: wrong timeout " "value %lx\n", timeout); dump_stack(); __set_current_state(TASK_RUNNING); goto out; } } expire = timeout + jiffies; timer.task = current; timer_setup_on_stack(&timer.timer, process_timeout, 0); __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING); schedule(); del_timer_sync(&timer.timer); /* Remove the timer from the object tracker */ destroy_timer_on_stack(&timer.timer); timeout = expire - jiffies; out: return timeout < 0 ? 0 : timeout; } EXPORT_SYMBOL(schedule_timeout); /* * We can use __set_current_state() here because schedule_timeout() calls * schedule() unconditionally. */ signed long __sched schedule_timeout_interruptible(signed long timeout) { __set_current_state(TASK_INTERRUPTIBLE); return schedule_timeout(timeout); } EXPORT_SYMBOL(schedule_timeout_interruptible); signed long __sched schedule_timeout_killable(signed long timeout) { __set_current_state(TASK_KILLABLE); return schedule_timeout(timeout); } EXPORT_SYMBOL(schedule_timeout_killable); signed long __sched schedule_timeout_uninterruptible(signed long timeout) { __set_current_state(TASK_UNINTERRUPTIBLE); return schedule_timeout(timeout); } EXPORT_SYMBOL(schedule_timeout_uninterruptible); /* * Like schedule_timeout_uninterruptible(), except this task will not contribute * to load average. */ signed long __sched schedule_timeout_idle(signed long timeout) { __set_current_state(TASK_IDLE); return schedule_timeout(timeout); } EXPORT_SYMBOL(schedule_timeout_idle); #ifdef CONFIG_HOTPLUG_CPU static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head) { struct timer_list *timer; int cpu = new_base->cpu; while (!hlist_empty(head)) { timer = hlist_entry(head->first, struct timer_list, entry); detach_timer(timer, false); timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; internal_add_timer(new_base, timer); } } int timers_prepare_cpu(unsigned int cpu) { struct timer_base *base; int b; for (b = 0; b < NR_BASES; b++) { base = per_cpu_ptr(&timer_bases[b], cpu); base->clk = jiffies; base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; base->next_expiry_recalc = false; base->timers_pending = false; base->is_idle = false; } return 0; } int timers_dead_cpu(unsigned int cpu) { struct timer_base *old_base; struct timer_base *new_base; int b, i; for (b = 0; b < NR_BASES; b++) { old_base = per_cpu_ptr(&timer_bases[b], cpu); new_base = get_cpu_ptr(&timer_bases[b]); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ raw_spin_lock_irq(&new_base->lock); raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); /* * The current CPUs base clock might be stale. Update it * before moving the timers over. */ forward_timer_base(new_base); WARN_ON_ONCE(old_base->running_timer); old_base->running_timer = NULL; for (i = 0; i < WHEEL_SIZE; i++) migrate_timer_list(new_base, old_base->vectors + i); raw_spin_unlock(&old_base->lock); raw_spin_unlock_irq(&new_base->lock); put_cpu_ptr(&timer_bases); } return 0; } #endif /* CONFIG_HOTPLUG_CPU */ static void __init init_timer_cpu(int cpu) { struct timer_base *base; int i; for (i = 0; i < NR_BASES; i++) { base = per_cpu_ptr(&timer_bases[i], cpu); base->cpu = cpu; raw_spin_lock_init(&base->lock); base->clk = jiffies; base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; timer_base_init_expiry_lock(base); } } static void __init init_timer_cpus(void) { int cpu; for_each_possible_cpu(cpu) init_timer_cpu(cpu); } void __init init_timers(void) { init_timer_cpus(); posix_cputimers_init_work(); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } /** * msleep - sleep safely even with waitqueue interruptions * @msecs: Time in milliseconds to sleep for */ void msleep(unsigned int msecs) { unsigned long timeout = msecs_to_jiffies(msecs) + 1; while (timeout) timeout = schedule_timeout_uninterruptible(timeout); } EXPORT_SYMBOL(msleep); /** * msleep_interruptible - sleep waiting for signals * @msecs: Time in milliseconds to sleep for */ unsigned long msleep_interruptible(unsigned int msecs) { unsigned long timeout = msecs_to_jiffies(msecs) + 1; while (timeout && !signal_pending(current)) timeout = schedule_timeout_interruptible(timeout); return jiffies_to_msecs(timeout); } EXPORT_SYMBOL(msleep_interruptible); /** * usleep_range_state - Sleep for an approximate time in a given state * @min: Minimum time in usecs to sleep * @max: Maximum time in usecs to sleep * @state: State of the current task that will be while sleeping * * In non-atomic context where the exact wakeup time is flexible, use * usleep_range_state() instead of udelay(). The sleep improves responsiveness * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces * power usage by allowing hrtimers to take advantage of an already- * scheduled interrupt instead of scheduling a new one just for this sleep. */ void __sched usleep_range_state(unsigned long min, unsigned long max, unsigned int state) { ktime_t exp = ktime_add_us(ktime_get(), min); u64 delta = (u64)(max - min) * NSEC_PER_USEC; for (;;) { __set_current_state(state); /* Do not return before the requested sleep time has elapsed */ if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) break; } } EXPORT_SYMBOL(usleep_range_state); |
2 2 50 61 2 54 20 3 25 894 896 479 482 3 478 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | // SPDX-License-Identifier: GPL-2.0-only /* * IPv6 library code, needed by static components when full IPv6 support is * not configured or static. These functions are needed by GSO/GRO implementation. */ #include <linux/export.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/addrconf.h> #include <net/secure_seq.h> #include <linux/netfilter.h> static u32 __ipv6_select_ident(struct net *net, const struct in6_addr *dst, const struct in6_addr *src) { return get_random_u32_above(0); } /* This function exists only for tap drivers that must support broken * clients requesting UFO without specifying an IPv6 fragment ID. * * This is similar to ipv6_select_ident() but we use an independent hash * seed to limit information leakage. * * The network header must be set before calling this. */ __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) { struct in6_addr buf[2]; struct in6_addr *addrs; u32 id; addrs = skb_header_pointer(skb, skb_network_offset(skb) + offsetof(struct ipv6hdr, saddr), sizeof(buf), buf); if (!addrs) return 0; id = __ipv6_select_ident(net, &addrs[1], &addrs[0]); return htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr) { u32 id; id = __ipv6_select_ident(net, daddr, saddr); return htonl(id); } EXPORT_SYMBOL(ipv6_select_ident); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { unsigned int offset = sizeof(struct ipv6hdr); unsigned int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; while (offset <= packet_len) { struct ipv6_opt_hdr *exthdr; switch (**nexthdr) { case NEXTHDR_HOP: break; case NEXTHDR_ROUTING: found_rhdr = 1; break; case NEXTHDR_DEST: #if IS_ENABLED(CONFIG_IPV6_MIP6) if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) break; #endif if (found_rhdr) return offset; break; default: return offset; } if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) return -EINVAL; exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset); offset += ipv6_optlen(exthdr); if (offset > IPV6_MAXPLEN) return -EINVAL; *nexthdr = &exthdr->nexthdr; } return -EINVAL; } EXPORT_SYMBOL(ip6_find_1stfragopt); #if IS_ENABLED(CONFIG_IPV6) int ip6_dst_hoplimit(struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); if (hoplimit == 0) { struct net_device *dev = dst->dev; struct inet6_dev *idev; rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) hoplimit = idev->cnf.hop_limit; else hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; rcu_read_unlock(); } return hoplimit; } EXPORT_SYMBOL(ip6_dst_hoplimit); #endif int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int len; len = skb->len - sizeof(struct ipv6hdr); if (len > IPV6_MAXPLEN) len = 0; ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing */ skb = l3mdev_ip6_out(sk, skb); if (unlikely(!skb)) return 0; skb->protocol = htons(ETH_P_IPV6); return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); } EXPORT_SYMBOL_GPL(__ip6_local_out); int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; err = __ip6_local_out(net, sk, skb); if (likely(err == 1)) err = dst_output(net, sk, skb); return err; } EXPORT_SYMBOL_GPL(ip6_local_out); |
81 80 7 1 42 16 15 1 13 11 16 16 15 4 82 3 3 3 6 6 6 5 6 6 27 40 3 3 177 200 71 200 210 362 211 210 83 127 377 1 235 67 244 405 406 362 361 202 117 3 3 85 85 85 80 204 9 388 7 1 362 361 4 46 79 212 361 85 5 16 5 2 16 26 45 85 244 83 83 406 118 406 405 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 |