Total coverage: 251642 (17%)of 1548218
72 72 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 // SPDX-License-Identifier: GPL-2.0-only /* * linux/net/sunrpc/sunrpc_syms.c * * Symbols exported by the sunrpc module. * * Copyright (C) 1997 Olaf Kirch <okir@monad.swb.de> */ #include <linux/module.h> #include <linux/types.h> #include <linux/uio.h> #include <linux/unistd.h> #include <linux/init.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/auth.h> #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/xprtsock.h> #include "sunrpc.h" #include "sysfs.h" #include "netns.h" unsigned int sunrpc_net_id; EXPORT_SYMBOL_GPL(sunrpc_net_id); static __net_init int sunrpc_init_net(struct net *net) { int err; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); err = rpc_proc_init(net); if (err) goto err_proc; err = ip_map_cache_create(net); if (err) goto err_ipmap; err = unix_gid_cache_create(net); if (err) goto err_unixgid; err = rpc_pipefs_init_net(net); if (err) goto err_pipefs; INIT_LIST_HEAD(&sn->all_clients); spin_lock_init(&sn->rpc_client_lock); spin_lock_init(&sn->rpcb_clnt_lock); return 0; err_pipefs: unix_gid_cache_destroy(net); err_unixgid: ip_map_cache_destroy(net); err_ipmap: rpc_proc_exit(net); err_proc: return err; } static __net_exit void sunrpc_exit_net(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); rpc_pipefs_exit_net(net); unix_gid_cache_destroy(net); ip_map_cache_destroy(net); rpc_proc_exit(net); WARN_ON_ONCE(!list_empty(&sn->all_clients)); } static struct pernet_operations sunrpc_net_ops = { .init = sunrpc_init_net, .exit = sunrpc_exit_net, .id = &sunrpc_net_id, .size = sizeof(struct sunrpc_net), }; static int __init init_sunrpc(void) { int err = rpc_init_mempool(); if (err) goto out; err = rpcauth_init_module(); if (err) goto out2; cache_initialize(); err = register_pernet_subsys(&sunrpc_net_ops); if (err) goto out3; err = register_rpc_pipefs(); if (err) goto out4; err = rpc_sysfs_init(); if (err) goto out5; sunrpc_debugfs_init(); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_register_sysctl(); #endif svc_init_xprt_sock(); /* svc sock transport */ init_socket_xprt(); /* clnt sock transport */ return 0; out5: unregister_rpc_pipefs(); out4: unregister_pernet_subsys(&sunrpc_net_ops); out3: rpcauth_remove_module(); out2: rpc_destroy_mempool(); out: return err; } static void __exit cleanup_sunrpc(void) { rpc_sysfs_exit(); rpc_cleanup_clids(); xprt_cleanup_ids(); xprt_multipath_cleanup_ids(); rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); sunrpc_debugfs_exit(); unregister_rpc_pipefs(); rpc_destroy_mempool(); unregister_pernet_subsys(&sunrpc_net_ops); auth_domain_cleanup(); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_unregister_sysctl(); #endif rcu_barrier(); /* Wait for completion of call_rcu()'s */ } MODULE_DESCRIPTION("Sun RPC core"); MODULE_LICENSE("GPL"); fs_initcall(init_sunrpc); /* Ensure we're initialised before nfs */ module_exit(cleanup_sunrpc);
177 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright 2019 Google LLC */ #ifndef __LINUX_BLK_CRYPTO_H #define __LINUX_BLK_CRYPTO_H #include <linux/types.h> enum blk_crypto_mode_num { BLK_ENCRYPTION_MODE_INVALID, BLK_ENCRYPTION_MODE_AES_256_XTS, BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV, BLK_ENCRYPTION_MODE_ADIANTUM, BLK_ENCRYPTION_MODE_SM4_XTS, BLK_ENCRYPTION_MODE_MAX, }; #define BLK_CRYPTO_MAX_KEY_SIZE 64 /** * struct blk_crypto_config - an inline encryption key's crypto configuration * @crypto_mode: encryption algorithm this key is for * @data_unit_size: the data unit size for all encryption/decryptions with this * key. This is the size in bytes of each individual plaintext and * ciphertext. This is always a power of 2. It might be e.g. the * filesystem block size or the disk sector size. * @dun_bytes: the maximum number of bytes of DUN used when using this key */ struct blk_crypto_config { enum blk_crypto_mode_num crypto_mode; unsigned int data_unit_size; unsigned int dun_bytes; }; /** * struct blk_crypto_key - an inline encryption key * @crypto_cfg: the crypto configuration (like crypto_mode, key size) for this * key * @data_unit_size_bits: log2 of data_unit_size * @size: size of this key in bytes (determined by @crypto_cfg.crypto_mode) * @raw: the raw bytes of this key. Only the first @size bytes are used. * * A blk_crypto_key is immutable once created, and many bios can reference it at * the same time. It must not be freed until all bios using it have completed * and it has been evicted from all devices on which it may have been used. */ struct blk_crypto_key { struct blk_crypto_config crypto_cfg; unsigned int data_unit_size_bits; unsigned int size; u8 raw[BLK_CRYPTO_MAX_KEY_SIZE]; }; #define BLK_CRYPTO_MAX_IV_SIZE 32 #define BLK_CRYPTO_DUN_ARRAY_SIZE (BLK_CRYPTO_MAX_IV_SIZE / sizeof(u64)) /** * struct bio_crypt_ctx - an inline encryption context * @bc_key: the key, algorithm, and data unit size to use * @bc_dun: the data unit number (starting IV) to use * * A bio_crypt_ctx specifies that the contents of the bio will be encrypted (for * write requests) or decrypted (for read requests) inline by the storage device * or controller, or by the crypto API fallback. */ struct bio_crypt_ctx { const struct blk_crypto_key *bc_key; u64 bc_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; }; #include <linux/blk_types.h> #include <linux/blkdev.h> #ifdef CONFIG_BLK_INLINE_ENCRYPTION static inline bool bio_has_crypt_ctx(struct bio *bio) { return bio->bi_crypt_context; } void bio_crypt_set_ctx(struct bio *bio, const struct blk_crypto_key *key, const u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], gfp_t gfp_mask); bool bio_crypt_dun_is_contiguous(const struct bio_crypt_ctx *bc, unsigned int bytes, const u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]); int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, enum blk_crypto_mode_num crypto_mode, unsigned int dun_bytes, unsigned int data_unit_size); int blk_crypto_start_using_key(struct block_device *bdev, const struct blk_crypto_key *key); void blk_crypto_evict_key(struct block_device *bdev, const struct blk_crypto_key *key); bool blk_crypto_config_supported_natively(struct block_device *bdev, const struct blk_crypto_config *cfg); bool blk_crypto_config_supported(struct block_device *bdev, const struct blk_crypto_config *cfg); #else /* CONFIG_BLK_INLINE_ENCRYPTION */ static inline bool bio_has_crypt_ctx(struct bio *bio) { return false; } #endif /* CONFIG_BLK_INLINE_ENCRYPTION */ int __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask); /** * bio_crypt_clone - clone bio encryption context * @dst: destination bio * @src: source bio * @gfp_mask: memory allocation flags * * If @src has an encryption context, clone it to @dst. * * Return: 0 on success, -ENOMEM if out of memory. -ENOMEM is only possible if * @gfp_mask doesn't include %__GFP_DIRECT_RECLAIM. */ static inline int bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask) { if (bio_has_crypt_ctx(src)) return __bio_crypt_clone(dst, src, gfp_mask); return 0; } #endif /* __LINUX_BLK_CRYPTO_H */
23 10 10 19 5 2 1 23 4 23 14 14 23 23 1 23 23 8 3 3 3 3 12 12 12 12 12 12 2 6 2 7 7 10 7 2 4 6 7 2 5 2 5 5 1 8 2 4 1 1 2 9 9 9 9 6 2 1 1 1 1 1 12 3 10 10 10 1 1 9 9 3 1 23 13 13 19 19 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 /* * 8259 interrupt controller emulation * * Copyright (c) 2003-2004 Fabrice Bellard * Copyright (c) 2007 Intel Corporation * Copyright 2009 Red Hat, Inc. and/or its affiliates. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * Authors: * Yaozu (Eddie) Dong <Eddie.dong@intel.com> * Port from Qemu. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/mm.h> #include <linux/slab.h> #include <linux/bitops.h> #include "irq.h" #include <linux/kvm_host.h> #include "trace.h" #define pr_pic_unimpl(fmt, ...) \ pr_err_ratelimited("pic: " fmt, ## __VA_ARGS__) static void pic_irq_request(struct kvm *kvm, int level); static void pic_lock(struct kvm_pic *s) __acquires(&s->lock) { spin_lock(&s->lock); } static void pic_unlock(struct kvm_pic *s) __releases(&s->lock) { bool wakeup = s->wakeup_needed; struct kvm_vcpu *vcpu; unsigned long i; s->wakeup_needed = false; spin_unlock(&s->lock); if (wakeup) { kvm_for_each_vcpu(i, vcpu, s->kvm) { if (kvm_apic_accept_pic_intr(vcpu)) { kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); return; } } } } static void pic_clear_isr(struct kvm_kpic_state *s, int irq) { s->isr &= ~(1 << irq); if (s != &s->pics_state->pics[0]) irq += 8; /* * We are dropping lock while calling ack notifiers since ack * notifier callbacks for assigned devices call into PIC recursively. * Other interrupt may be delivered to PIC while lock is dropped but * it should be safe since PIC state is already updated at this stage. */ pic_unlock(s->pics_state); kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); pic_lock(s->pics_state); } /* * set irq level. If an edge is detected, then the IRR is set to 1 */ static inline int pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) { int mask, ret = 1; mask = 1 << irq; if (s->elcr & mask) /* level triggered */ if (level) { ret = !(s->irr & mask); s->irr |= mask; s->last_irr |= mask; } else { s->irr &= ~mask; s->last_irr &= ~mask; } else /* edge triggered */ if (level) { if ((s->last_irr & mask) == 0) { ret = !(s->irr & mask); s->irr |= mask; } s->last_irr |= mask; } else s->last_irr &= ~mask; return (s->imr & mask) ? -1 : ret; } /* * return the highest priority found in mask (highest = smallest * number). Return 8 if no irq */ static inline int get_priority(struct kvm_kpic_state *s, int mask) { int priority; if (mask == 0) return 8; priority = 0; while ((mask & (1 << ((priority + s->priority_add) & 7))) == 0) priority++; return priority; } /* * return the pic wanted interrupt. return -1 if none */ static int pic_get_irq(struct kvm_kpic_state *s) { int mask, cur_priority, priority; mask = s->irr & ~s->imr; priority = get_priority(s, mask); if (priority == 8) return -1; /* * compute current priority. If special fully nested mode on the * master, the IRQ coming from the slave is not taken into account * for the priority computation. */ mask = s->isr; if (s->special_fully_nested_mode && s == &s->pics_state->pics[0]) mask &= ~(1 << 2); cur_priority = get_priority(s, mask); if (priority < cur_priority) /* * higher priority found: an irq should be generated */ return (priority + s->priority_add) & 7; else return -1; } /* * raise irq to CPU if necessary. must be called every time the active * irq may change */ static void pic_update_irq(struct kvm_pic *s) { int irq2, irq; irq2 = pic_get_irq(&s->pics[1]); if (irq2 >= 0) { /* * if irq request by slave pic, signal master PIC */ pic_set_irq1(&s->pics[0], 2, 1); pic_set_irq1(&s->pics[0], 2, 0); } irq = pic_get_irq(&s->pics[0]); pic_irq_request(s->kvm, irq >= 0); } void kvm_pic_update_irq(struct kvm_pic *s) { pic_lock(s); pic_update_irq(s); pic_unlock(s); } int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level) { int ret, irq_level; BUG_ON(irq < 0 || irq >= PIC_NUM_PINS); pic_lock(s); irq_level = __kvm_irq_line_state(&s->irq_states[irq], irq_source_id, level); ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level); pic_update_irq(s); trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, s->pics[irq >> 3].imr, ret == 0); pic_unlock(s); return ret; } void kvm_pic_clear_all(struct kvm_pic *s, int irq_source_id) { int i; pic_lock(s); for (i = 0; i < PIC_NUM_PINS; i++) __clear_bit(irq_source_id, &s->irq_states[i]); pic_unlock(s); } /* * acknowledge interrupt 'irq' */ static inline void pic_intack(struct kvm_kpic_state *s, int irq) { s->isr |= 1 << irq; /* * We don't clear a level sensitive interrupt here */ if (!(s->elcr & (1 << irq))) s->irr &= ~(1 << irq); if (s->auto_eoi) { if (s->rotate_on_auto_eoi) s->priority_add = (irq + 1) & 7; pic_clear_isr(s, irq); } } int kvm_pic_read_irq(struct kvm *kvm) { int irq, irq2, intno; struct kvm_pic *s = kvm->arch.vpic; s->output = 0; pic_lock(s); irq = pic_get_irq(&s->pics[0]); if (irq >= 0) { pic_intack(&s->pics[0], irq); if (irq == 2) { irq2 = pic_get_irq(&s->pics[1]); if (irq2 >= 0) pic_intack(&s->pics[1], irq2); else /* * spurious IRQ on slave controller */ irq2 = 7; intno = s->pics[1].irq_base + irq2; } else intno = s->pics[0].irq_base + irq; } else { /* * spurious IRQ on host controller */ irq = 7; intno = s->pics[0].irq_base + irq; } pic_update_irq(s); pic_unlock(s); return intno; } static void kvm_pic_reset(struct kvm_kpic_state *s) { int irq; unsigned long i; struct kvm_vcpu *vcpu; u8 edge_irr = s->irr & ~s->elcr; bool found = false; s->last_irr = 0; s->irr &= s->elcr; s->imr = 0; s->priority_add = 0; s->special_mask = 0; s->read_reg_select = 0; if (!s->init4) { s->special_fully_nested_mode = 0; s->auto_eoi = 0; } s->init_state = 1; kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm) if (kvm_apic_accept_pic_intr(vcpu)) { found = true; break; } if (!found) return; for (irq = 0; irq < PIC_NUM_PINS/2; irq++) if (edge_irr & (1 << irq)) pic_clear_isr(s, irq); } static void pic_ioport_write(void *opaque, u32 addr, u32 val) { struct kvm_kpic_state *s = opaque; int priority, cmd, irq; addr &= 1; if (addr == 0) { if (val & 0x10) { s->init4 = val & 1; if (val & 0x02) pr_pic_unimpl("single mode not supported"); if (val & 0x08) pr_pic_unimpl( "level sensitive irq not supported"); kvm_pic_reset(s); } else if (val & 0x08) { if (val & 0x04) s->poll = 1; if (val & 0x02) s->read_reg_select = val & 1; if (val & 0x40) s->special_mask = (val >> 5) & 1; } else { cmd = val >> 5; switch (cmd) { case 0: case 4: s->rotate_on_auto_eoi = cmd >> 2; break; case 1: /* end of interrupt */ case 5: priority = get_priority(s, s->isr); if (priority != 8) { irq = (priority + s->priority_add) & 7; if (cmd == 5) s->priority_add = (irq + 1) & 7; pic_clear_isr(s, irq); pic_update_irq(s->pics_state); } break; case 3: irq = val & 7; pic_clear_isr(s, irq); pic_update_irq(s->pics_state); break; case 6: s->priority_add = (val + 1) & 7; pic_update_irq(s->pics_state); break; case 7: irq = val & 7; s->priority_add = (irq + 1) & 7; pic_clear_isr(s, irq); pic_update_irq(s->pics_state); break; default: break; /* no operation */ } } } else switch (s->init_state) { case 0: { /* normal mode */ u8 imr_diff = s->imr ^ val, off = (s == &s->pics_state->pics[0]) ? 0 : 8; s->imr = val; for (irq = 0; irq < PIC_NUM_PINS/2; irq++) if (imr_diff & (1 << irq)) kvm_fire_mask_notifiers( s->pics_state->kvm, SELECT_PIC(irq + off), irq + off, !!(s->imr & (1 << irq))); pic_update_irq(s->pics_state); break; } case 1: s->irq_base = val & 0xf8; s->init_state = 2; break; case 2: if (s->init4) s->init_state = 3; else s->init_state = 0; break; case 3: s->special_fully_nested_mode = (val >> 4) & 1; s->auto_eoi = (val >> 1) & 1; s->init_state = 0; break; } } static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1) { int ret; ret = pic_get_irq(s); if (ret >= 0) { if (addr1 >> 7) { s->pics_state->pics[0].isr &= ~(1 << 2); s->pics_state->pics[0].irr &= ~(1 << 2); } s->irr &= ~(1 << ret); pic_clear_isr(s, ret); if (addr1 >> 7 || ret != 2) pic_update_irq(s->pics_state); /* Bit 7 is 1, means there's an interrupt */ ret |= 0x80; } else { /* Bit 7 is 0, means there's no interrupt */ ret = 0x07; pic_update_irq(s->pics_state); } return ret; } static u32 pic_ioport_read(void *opaque, u32 addr) { struct kvm_kpic_state *s = opaque; int ret; if (s->poll) { ret = pic_poll_read(s, addr); s->poll = 0; } else if ((addr & 1) == 0) if (s->read_reg_select) ret = s->isr; else ret = s->irr; else ret = s->imr; return ret; } static void elcr_ioport_write(void *opaque, u32 val) { struct kvm_kpic_state *s = opaque; s->elcr = val & s->elcr_mask; } static u32 elcr_ioport_read(void *opaque) { struct kvm_kpic_state *s = opaque; return s->elcr; } static int picdev_write(struct kvm_pic *s, gpa_t addr, int len, const void *val) { unsigned char data = *(unsigned char *)val; if (len != 1) { pr_pic_unimpl("non byte write\n"); return 0; } switch (addr) { case 0x20: case 0x21: pic_lock(s); pic_ioport_write(&s->pics[0], addr, data); pic_unlock(s); break; case 0xa0: case 0xa1: pic_lock(s); pic_ioport_write(&s->pics[1], addr, data); pic_unlock(s); break; case 0x4d0: case 0x4d1: pic_lock(s); elcr_ioport_write(&s->pics[addr & 1], data); pic_unlock(s); break; default: return -EOPNOTSUPP; } return 0; } static int picdev_read(struct kvm_pic *s, gpa_t addr, int len, void *val) { unsigned char *data = (unsigned char *)val; if (len != 1) { memset(val, 0, len); pr_pic_unimpl("non byte read\n"); return 0; } switch (addr) { case 0x20: case 0x21: case 0xa0: case 0xa1: pic_lock(s); *data = pic_ioport_read(&s->pics[addr >> 7], addr); pic_unlock(s); break; case 0x4d0: case 0x4d1: pic_lock(s); *data = elcr_ioport_read(&s->pics[addr & 1]); pic_unlock(s); break; default: return -EOPNOTSUPP; } return 0; } static int picdev_master_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { return picdev_write(container_of(dev, struct kvm_pic, dev_master), addr, len, val); } static int picdev_master_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { return picdev_read(container_of(dev, struct kvm_pic, dev_master), addr, len, val); } static int picdev_slave_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { return picdev_write(container_of(dev, struct kvm_pic, dev_slave), addr, len, val); } static int picdev_slave_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { return picdev_read(container_of(dev, struct kvm_pic, dev_slave), addr, len, val); } static int picdev_elcr_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { return picdev_write(container_of(dev, struct kvm_pic, dev_elcr), addr, len, val); } static int picdev_elcr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { return picdev_read(container_of(dev, struct kvm_pic, dev_elcr), addr, len, val); } /* * callback when PIC0 irq status changed */ static void pic_irq_request(struct kvm *kvm, int level) { struct kvm_pic *s = kvm->arch.vpic; if (!s->output) s->wakeup_needed = true; s->output = level; } static const struct kvm_io_device_ops picdev_master_ops = { .read = picdev_master_read, .write = picdev_master_write, }; static const struct kvm_io_device_ops picdev_slave_ops = { .read = picdev_slave_read, .write = picdev_slave_write, }; static const struct kvm_io_device_ops picdev_elcr_ops = { .read = picdev_elcr_read, .write = picdev_elcr_write, }; int kvm_pic_init(struct kvm *kvm) { struct kvm_pic *s; int ret; s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL_ACCOUNT); if (!s) return -ENOMEM; spin_lock_init(&s->lock); s->kvm = kvm; s->pics[0].elcr_mask = 0xf8; s->pics[1].elcr_mask = 0xde; s->pics[0].pics_state = s; s->pics[1].pics_state = s; /* * Initialize PIO device */ kvm_iodevice_init(&s->dev_master, &picdev_master_ops); kvm_iodevice_init(&s->dev_slave, &picdev_slave_ops); kvm_iodevice_init(&s->dev_elcr, &picdev_elcr_ops); mutex_lock(&kvm->slots_lock); ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2, &s->dev_master); if (ret < 0) goto fail_unlock; ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2, &s->dev_slave); if (ret < 0) goto fail_unreg_2; ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev_elcr); if (ret < 0) goto fail_unreg_1; mutex_unlock(&kvm->slots_lock); kvm->arch.vpic = s; return 0; fail_unreg_1: kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave); fail_unreg_2: kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_master); fail_unlock: mutex_unlock(&kvm->slots_lock); kfree(s); return ret; } void kvm_pic_destroy(struct kvm *kvm) { struct kvm_pic *vpic = kvm->arch.vpic; if (!vpic) return; mutex_lock(&kvm->slots_lock); kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master); kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave); kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_elcr); mutex_unlock(&kvm->slots_lock); kvm->arch.vpic = NULL; kfree(vpic); }
1 2 1 1 1 1 4 4 1 9 2 2 1 19 19 1 19 17 12 8 4 1 1 2 4 6 1 1 4 1 1 1 1 2 2 2 2 2 2 2 2 5 1 2 1 3 8 1 4 3 422 15 420 422 423 423 72 1 71 7 44 8 38 11 28 2 3 7 6 2 2 2 2 2 2 2 9 7 2 2 4 4 3 1 2 5 2 3 3 13 1 6 5 6 10 6 4 10 10 12 8 2 1 1 1 5 4 2 2 11 1 4 1 7 6 1 5 11 1 2 3 1 6 4 4 8 2 1 1 5 6 3 1 2 3 54 54 53 2 2 2 2 495 418 11 466 6 7 5 5 454 453 104 11 1 10 196 408 3 406 425 63 17 54 54 10 1 4 5 1 1 11 10 10 2 2 2 10 22 22 12 12 4 12 7 9 22 5 5 3 5 5 5 3 1 1 7 4 2 1 1 1 3 4 1 1 2 3 2 386 82 336 81 336 2 386 393 3 384 384 248 374 390 23 1 22 1 9 10 22 6 6 9 2 7 5 2 39 1 1 3 1 1 32 5 11 8 1 1 7 8 7 3 4 4 5 4 2 2 2 2 1 1 1 4 4 1 3 3 3 1 1 2 1 1 2 2 2 1 2 2 1 2 2 3 2 14 8 2 5 382 34 37 37 102 71 31 1 32 32 6 6 4 5 5 12 1 6 3 3 5 5 4 1 3 1 2 2 4 62 68 68 67 5 61 1 3 58 61 67 9 3 1 4 11 4 4 5 22 22 11 5 5 7 5 5 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/proc/base.c * * Copyright (C) 1991, 1992 Linus Torvalds * * proc base directory handling functions * * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. * Instead of using magical inumbers to determine the kind of object * we allocate and fill in-core inodes upon lookup. They don't even * go into icache. We cache the reference to task_struct upon lookup too. * Eventually it should become a filesystem in its own. We don't use the * rest of procfs anymore. * * * Changelog: * 17-Jan-2005 * Allan Bezerra * Bruna Moreira <bruna.moreira@indt.org.br> * Edjard Mota <edjard.mota@indt.org.br> * Ilias Biris <ilias.biris@indt.org.br> * Mauricio Lin <mauricio.lin@indt.org.br> * * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT * * A new process specific entry (smaps) included in /proc. It shows the * size of rss for each memory area. The maps entry lacks information * about physical memory size (rss) for each mapped file, i.e., * rss information for executables and library files. * This additional information is useful for any tools that need to know * about physical memory consumption for a process specific library. * * Changelog: * 21-Feb-2005 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT * Pud inclusion in the page table walking. * * ChangeLog: * 10-Mar-2005 * 10LE Instituto Nokia de Tecnologia - INdT: * A better way to walks through the page table as suggested by Hugh Dickins. * * Simo Piiroinen <simo.piiroinen@nokia.com>: * Smaps information related to shared, private, clean and dirty pages. * * Paul Mundt <paul.mundt@nokia.com>: * Overall revision about smaps. */ #include <linux/uaccess.h> #include <linux/errno.h> #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/task_io_accounting_ops.h> #include <linux/init.h> #include <linux/capability.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/generic-radix-tree.h> #include <linux/string.h> #include <linux/seq_file.h> #include <linux/namei.h> #include <linux/mnt_namespace.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/rcupdate.h> #include <linux/kallsyms.h> #include <linux/stacktrace.h> #include <linux/resource.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/security.h> #include <linux/ptrace.h> #include <linux/printk.h> #include <linux/cache.h> #include <linux/cgroup.h> #include <linux/cpuset.h> #include <linux/audit.h> #include <linux/poll.h> #include <linux/nsproxy.h> #include <linux/oom.h> #include <linux/elf.h> #include <linux/pid_namespace.h> #include <linux/user_namespace.h> #include <linux/fs_parser.h> #include <linux/fs_struct.h> #include <linux/slab.h> #include <linux/sched/autogroup.h> #include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/sched/debug.h> #include <linux/sched/stat.h> #include <linux/posix-timers.h> #include <linux/time_namespace.h> #include <linux/resctrl.h> #include <linux/cn_proc.h> #include <linux/ksm.h> #include <uapi/linux/lsm.h> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" #include "../../lib/kstrtox.h" /* NOTE: * Implementing inode permission operations in /proc is almost * certainly an error. Permission checks need to happen during * each system call not at open time. The reason is that most of * what we wish to check for permissions in /proc varies at runtime. * * The classic example of a problem is opening file descriptors * in /proc for a task before it execs a suid executable. */ static u8 nlink_tid __ro_after_init; static u8 nlink_tgid __ro_after_init; enum proc_mem_force { PROC_MEM_FORCE_ALWAYS, PROC_MEM_FORCE_PTRACE, PROC_MEM_FORCE_NEVER }; static enum proc_mem_force proc_mem_force_override __ro_after_init = IS_ENABLED(CONFIG_PROC_MEM_NO_FORCE) ? PROC_MEM_FORCE_NEVER : IS_ENABLED(CONFIG_PROC_MEM_FORCE_PTRACE) ? PROC_MEM_FORCE_PTRACE : PROC_MEM_FORCE_ALWAYS; static const struct constant_table proc_mem_force_table[] __initconst = { { "always", PROC_MEM_FORCE_ALWAYS }, { "ptrace", PROC_MEM_FORCE_PTRACE }, { "never", PROC_MEM_FORCE_NEVER }, { } }; static int __init early_proc_mem_force_override(char *buf) { if (!buf) return -EINVAL; /* * lookup_constant() defaults to proc_mem_force_override to preseve * the initial Kconfig choice in case an invalid param gets passed. */ proc_mem_force_override = lookup_constant(proc_mem_force_table, buf, proc_mem_force_override); return 0; } early_param("proc_mem.force_override", early_proc_mem_force_override); struct pid_entry { const char *name; unsigned int len; umode_t mode; const struct inode_operations *iop; const struct file_operations *fop; union proc_op op; }; #define NOD(NAME, MODE, IOP, FOP, OP) { \ .name = (NAME), \ .len = sizeof(NAME) - 1, \ .mode = MODE, \ .iop = IOP, \ .fop = FOP, \ .op = OP, \ } #define DIR(NAME, MODE, iops, fops) \ NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) #define LNK(NAME, get_link) \ NOD(NAME, (S_IFLNK|S_IRWXUGO), \ &proc_pid_link_inode_operations, NULL, \ { .proc_get_link = get_link } ) #define REG(NAME, MODE, fops) \ NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) #define ONE(NAME, MODE, show) \ NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_single_file_operations, \ { .proc_show = show } ) #define ATTR(LSMID, NAME, MODE) \ NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_pid_attr_operations, \ { .lsmid = LSMID }) /* * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. */ static unsigned int __init pid_entry_nlink(const struct pid_entry *entries, unsigned int n) { unsigned int i; unsigned int count; count = 2; for (i = 0; i < n; ++i) { if (S_ISDIR(entries[i].mode)) ++count; } return count; } static int get_task_root(struct task_struct *task, struct path *root) { int result = -ENOENT; task_lock(task); if (task->fs) { get_fs_root(task->fs, root); result = 0; } task_unlock(task); return result; } static int proc_cwd_link(struct dentry *dentry, struct path *path) { struct task_struct *task = get_proc_task(d_inode(dentry)); int result = -ENOENT; if (task) { task_lock(task); if (task->fs) { get_fs_pwd(task->fs, path); result = 0; } task_unlock(task); put_task_struct(task); } return result; } static int proc_root_link(struct dentry *dentry, struct path *path) { struct task_struct *task = get_proc_task(d_inode(dentry)); int result = -ENOENT; if (task) { result = get_task_root(task, path); put_task_struct(task); } return result; } /* * If the user used setproctitle(), we just get the string from * user space at arg_start, and limit it to a maximum of one page. */ static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf, size_t count, unsigned long pos, unsigned long arg_start) { char *page; int ret, got; if (pos >= PAGE_SIZE) return 0; page = (char *)__get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; ret = 0; got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON); if (got > 0) { int len = strnlen(page, got); /* Include the NUL character if it was found */ if (len < got) len++; if (len > pos) { len -= pos; if (len > count) len = count; len -= copy_to_user(buf, page+pos, len); if (!len) len = -EFAULT; ret = len; } } free_page((unsigned long)page); return ret; } static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, size_t count, loff_t *ppos) { unsigned long arg_start, arg_end, env_start, env_end; unsigned long pos, len; char *page, c; /* Check if process spawned far enough to have cmdline. */ if (!mm->env_end) return 0; spin_lock(&mm->arg_lock); arg_start = mm->arg_start; arg_end = mm->arg_end; env_start = mm->env_start; env_end = mm->env_end; spin_unlock(&mm->arg_lock); if (arg_start >= arg_end) return 0; /* * We allow setproctitle() to overwrite the argument * strings, and overflow past the original end. But * only when it overflows into the environment area. */ if (env_start != arg_end || env_end < env_start) env_start = env_end = arg_end; len = env_end - arg_start; /* We're not going to care if "*ppos" has high bits set */ pos = *ppos; if (pos >= len) return 0; if (count > len - pos) count = len - pos; if (!count) return 0; /* * Magical special case: if the argv[] end byte is not * zero, the user has overwritten it with setproctitle(3). * * Possible future enhancement: do this only once when * pos is 0, and set a flag in the 'struct file'. */ if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c) return get_mm_proctitle(mm, buf, count, pos, arg_start); /* * For the non-setproctitle() case we limit things strictly * to the [arg_start, arg_end[ range. */ pos += arg_start; if (pos < arg_start || pos >= arg_end) return 0; if (count > arg_end - pos) count = arg_end - pos; page = (char *)__get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; len = 0; while (count) { int got; size_t size = min_t(size_t, PAGE_SIZE, count); got = access_remote_vm(mm, pos, page, size, FOLL_ANON); if (got <= 0) break; got -= copy_to_user(buf, page, got); if (unlikely(!got)) { if (!len) len = -EFAULT; break; } pos += got; buf += got; len += got; count -= got; } free_page((unsigned long)page); return len; } static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf, size_t count, loff_t *pos) { struct mm_struct *mm; ssize_t ret; mm = get_task_mm(tsk); if (!mm) return 0; ret = get_mm_cmdline(mm, buf, count, pos); mmput(mm); return ret; } static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { struct task_struct *tsk; ssize_t ret; BUG_ON(*pos < 0); tsk = get_proc_task(file_inode(file)); if (!tsk) return -ESRCH; ret = get_task_cmdline(tsk, buf, count, pos); put_task_struct(tsk); if (ret > 0) *pos += ret; return ret; } static const struct file_operations proc_pid_cmdline_ops = { .read = proc_pid_cmdline_read, .llseek = generic_file_llseek, }; #ifdef CONFIG_KALLSYMS /* * Provides a wchan file via kallsyms in a proper one-value-per-file format. * Returns the resolved symbol. If that fails, simply return the address. */ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long wchan; char symname[KSYM_NAME_LEN]; if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto print0; wchan = get_wchan(task); if (wchan && !lookup_symbol_name(wchan, symname)) { seq_puts(m, symname); return 0; } print0: seq_putc(m, '0'); return 0; } #endif /* CONFIG_KALLSYMS */ static int lock_trace(struct task_struct *task) { int err = down_read_killable(&task->signal->exec_update_lock); if (err) return err; if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) { up_read(&task->signal->exec_update_lock); return -EPERM; } return 0; } static void unlock_trace(struct task_struct *task) { up_read(&task->signal->exec_update_lock); } #ifdef CONFIG_STACKTRACE #define MAX_STACK_TRACE_DEPTH 64 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long *entries; int err; /* * The ability to racily run the kernel stack unwinder on a running task * and then observe the unwinder output is scary; while it is useful for * debugging kernel issues, it can also allow an attacker to leak kernel * stack contents. * Doing this in a manner that is at least safe from races would require * some work to ensure that the remote task can not be scheduled; and * even then, this would still expose the unwinder as local attack * surface. * Therefore, this interface is restricted to root. */ if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) return -EACCES; entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries), GFP_KERNEL); if (!entries) return -ENOMEM; err = lock_trace(task); if (!err) { unsigned int i, nr_entries; nr_entries = stack_trace_save_tsk(task, entries, MAX_STACK_TRACE_DEPTH, 0); for (i = 0; i < nr_entries; i++) { seq_printf(m, "[<0>] %pB\n", (void *)entries[i]); } unlock_trace(task); } kfree(entries); return err; } #endif #ifdef CONFIG_SCHED_INFO /* * Provides /proc/PID/schedstat */ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { if (unlikely(!sched_info_on())) seq_puts(m, "0 0 0\n"); else seq_printf(m, "%llu %llu %lu\n", (unsigned long long)task->se.sum_exec_runtime, (unsigned long long)task->sched_info.run_delay, task->sched_info.pcount); return 0; } #endif #ifdef CONFIG_LATENCYTOP static int lstats_show_proc(struct seq_file *m, void *v) { int i; struct inode *inode = m->private; struct task_struct *task = get_proc_task(inode); if (!task) return -ESRCH; seq_puts(m, "Latency Top version : v0.1\n"); for (i = 0; i < LT_SAVECOUNT; i++) { struct latency_record *lr = &task->latency_record[i]; if (lr->backtrace[0]) { int q; seq_printf(m, "%i %li %li", lr->count, lr->time, lr->max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { unsigned long bt = lr->backtrace[q]; if (!bt) break; seq_printf(m, " %ps", (void *)bt); } seq_putc(m, '\n'); } } put_task_struct(task); return 0; } static int lstats_open(struct inode *inode, struct file *file) { return single_open(file, lstats_show_proc, inode); } static ssize_t lstats_write(struct file *file, const char __user *buf, size_t count, loff_t *offs) { struct task_struct *task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; clear_tsk_latency_tracing(task); put_task_struct(task); return count; } static const struct file_operations proc_lstats_operations = { .open = lstats_open, .read = seq_read, .write = lstats_write, .llseek = seq_lseek, .release = single_release, }; #endif static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long totalpages = totalram_pages() + total_swap_pages; unsigned long points = 0; long badness; badness = oom_badness(task, totalpages); /* * Special case OOM_SCORE_ADJ_MIN for all others scale the * badness value into [0, 2000] range which we have been * exporting for a long time so userspace might depend on it. */ if (badness != LONG_MIN) points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3; seq_printf(m, "%lu\n", points); return 0; } struct limit_names { const char *name; const char *unit; }; static const struct limit_names lnames[RLIM_NLIMITS] = { [RLIMIT_CPU] = {"Max cpu time", "seconds"}, [RLIMIT_FSIZE] = {"Max file size", "bytes"}, [RLIMIT_DATA] = {"Max data size", "bytes"}, [RLIMIT_STACK] = {"Max stack size", "bytes"}, [RLIMIT_CORE] = {"Max core file size", "bytes"}, [RLIMIT_RSS] = {"Max resident set", "bytes"}, [RLIMIT_NPROC] = {"Max processes", "processes"}, [RLIMIT_NOFILE] = {"Max open files", "files"}, [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, [RLIMIT_AS] = {"Max address space", "bytes"}, [RLIMIT_LOCKS] = {"Max file locks", "locks"}, [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, [RLIMIT_NICE] = {"Max nice priority", NULL}, [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, }; /* Display limits for a process */ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned int i; unsigned long flags; struct rlimit rlim[RLIM_NLIMITS]; if (!lock_task_sighand(task, &flags)) return 0; memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); unlock_task_sighand(task, &flags); /* * print the file header */ seq_puts(m, "Limit " "Soft Limit " "Hard Limit " "Units \n"); for (i = 0; i < RLIM_NLIMITS; i++) { if (rlim[i].rlim_cur == RLIM_INFINITY) seq_printf(m, "%-25s %-20s ", lnames[i].name, "unlimited"); else seq_printf(m, "%-25s %-20lu ", lnames[i].name, rlim[i].rlim_cur); if (rlim[i].rlim_max == RLIM_INFINITY) seq_printf(m, "%-20s ", "unlimited"); else seq_printf(m, "%-20lu ", rlim[i].rlim_max); if (lnames[i].unit) seq_printf(m, "%-10s\n", lnames[i].unit); else seq_putc(m, '\n'); } return 0; } #ifdef CONFIG_HAVE_ARCH_TRACEHOOK static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { struct syscall_info info; u64 *args = &info.data.args[0]; int res; res = lock_trace(task); if (res) return res; if (task_current_syscall(task, &info)) seq_puts(m, "running\n"); else if (info.data.nr < 0) seq_printf(m, "%d 0x%llx 0x%llx\n", info.data.nr, info.sp, info.data.instruction_pointer); else seq_printf(m, "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n", info.data.nr, args[0], args[1], args[2], args[3], args[4], args[5], info.sp, info.data.instruction_pointer); unlock_trace(task); return 0; } #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ /************************************************************************/ /* Here the fs part begins */ /************************************************************************/ /* permission checks */ static bool proc_fd_access_allowed(struct inode *inode) { struct task_struct *task; bool allowed = false; /* Allow access to a task's file descriptors if it is us or we * may use ptrace attach to the process and find out that * information. */ task = get_proc_task(inode); if (task) { allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); put_task_struct(task); } return allowed; } int proc_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { int error; struct inode *inode = d_inode(dentry); if (attr->ia_valid & ATTR_MODE) return -EPERM; error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; setattr_copy(&nop_mnt_idmap, inode, attr); return 0; } /* * May current process learn task's sched/cmdline info (for hide_pid_min=1) * or euid/egid (for hide_pid_min=2)? */ static bool has_pid_permissions(struct proc_fs_info *fs_info, struct task_struct *task, enum proc_hidepid hide_pid_min) { /* * If 'hidpid' mount option is set force a ptrace check, * we indicate that we are using a filesystem syscall * by passing PTRACE_MODE_READ_FSCREDS */ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); if (fs_info->hide_pid < hide_pid_min) return true; if (in_group_p(fs_info->pid_gid)) return true; return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); } static int proc_pid_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); struct task_struct *task; bool has_perms; task = get_proc_task(inode); if (!task) return -ESRCH; has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS); put_task_struct(task); if (!has_perms) { if (fs_info->hide_pid == HIDEPID_INVISIBLE) { /* * Let's make getdents(), stat(), and open() * consistent with each other. If a process * may not stat() a file, it shouldn't be seen * in procfs at all. */ return -ENOENT; } return -EPERM; } return generic_permission(&nop_mnt_idmap, inode, mask); } static const struct inode_operations proc_def_inode_operations = { .setattr = proc_setattr, }; static int proc_single_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct pid_namespace *ns = proc_pid_ns(inode->i_sb); struct pid *pid = proc_pid(inode); struct task_struct *task; int ret; task = get_pid_task(pid, PIDTYPE_PID); if (!task) return -ESRCH; ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); put_task_struct(task); return ret; } static int proc_single_open(struct inode *inode, struct file *filp) { return single_open(filp, proc_single_show, inode); } static const struct file_operations proc_single_file_operations = { .open = proc_single_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) { struct task_struct *task = get_proc_task(inode); struct mm_struct *mm = ERR_PTR(-ESRCH); if (task) { mm = mm_access(task, mode | PTRACE_MODE_FSCREDS); put_task_struct(task); if (!IS_ERR_OR_NULL(mm)) { /* ensure this mm_struct can't be freed */ mmgrab(mm); /* but do not pin its memory */ mmput(mm); } } return mm; } static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) { struct mm_struct *mm = proc_mem_open(inode, mode); if (IS_ERR(mm)) return PTR_ERR(mm); file->private_data = mm; return 0; } static int mem_open(struct inode *inode, struct file *file) { if (WARN_ON_ONCE(!(file->f_op->fop_flags & FOP_UNSIGNED_OFFSET))) return -EINVAL; return __mem_open(inode, file, PTRACE_MODE_ATTACH); } static bool proc_mem_foll_force(struct file *file, struct mm_struct *mm) { struct task_struct *task; bool ptrace_active = false; switch (proc_mem_force_override) { case PROC_MEM_FORCE_NEVER: return false; case PROC_MEM_FORCE_PTRACE: task = get_proc_task(file_inode(file)); if (task) { ptrace_active = READ_ONCE(task->ptrace) && READ_ONCE(task->mm) == mm && READ_ONCE(task->parent) == current; put_task_struct(task); } return ptrace_active; default: return true; } } static ssize_t mem_rw(struct file *file, char __user *buf, size_t count, loff_t *ppos, int write) { struct mm_struct *mm = file->private_data; unsigned long addr = *ppos; ssize_t copied; char *page; unsigned int flags; if (!mm) return 0; page = (char *)__get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; copied = 0; if (!mmget_not_zero(mm)) goto free; flags = write ? FOLL_WRITE : 0; if (proc_mem_foll_force(file, mm)) flags |= FOLL_FORCE; while (count > 0) { size_t this_len = min_t(size_t, count, PAGE_SIZE); if (write && copy_from_user(page, buf, this_len)) { copied = -EFAULT; break; } this_len = access_remote_vm(mm, addr, page, this_len, flags); if (!this_len) { if (!copied) copied = -EIO; break; } if (!write && copy_to_user(buf, page, this_len)) { copied = -EFAULT; break; } buf += this_len; addr += this_len; copied += this_len; count -= this_len; } *ppos = addr; mmput(mm); free: free_page((unsigned long) page); return copied; } static ssize_t mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { return mem_rw(file, buf, count, ppos, 0); } static ssize_t mem_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { return mem_rw(file, (char __user*)buf, count, ppos, 1); } loff_t mem_lseek(struct file *file, loff_t offset, int orig) { switch (orig) { case 0: file->f_pos = offset; break; case 1: file->f_pos += offset; break; default: return -EINVAL; } force_successful_syscall_return(); return file->f_pos; } static int mem_release(struct inode *inode, struct file *file) { struct mm_struct *mm = file->private_data; if (mm) mmdrop(mm); return 0; } static const struct file_operations proc_mem_operations = { .llseek = mem_lseek, .read = mem_read, .write = mem_write, .open = mem_open, .release = mem_release, .fop_flags = FOP_UNSIGNED_OFFSET, }; static int environ_open(struct inode *inode, struct file *file) { return __mem_open(inode, file, PTRACE_MODE_READ); } static ssize_t environ_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { char *page; unsigned long src = *ppos; int ret = 0; struct mm_struct *mm = file->private_data; unsigned long env_start, env_end; /* Ensure the process spawned far enough to have an environment. */ if (!mm || !mm->env_end) return 0; page = (char *)__get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; ret = 0; if (!mmget_not_zero(mm)) goto free; spin_lock(&mm->arg_lock); env_start = mm->env_start; env_end = mm->env_end; spin_unlock(&mm->arg_lock); while (count > 0) { size_t this_len, max_len; int retval; if (src >= (env_end - env_start)) break; this_len = env_end - (env_start + src); max_len = min_t(size_t, PAGE_SIZE, count); this_len = min(max_len, this_len); retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON); if (retval <= 0) { ret = retval; break; } if (copy_to_user(buf, page, retval)) { ret = -EFAULT; break; } ret += retval; src += retval; buf += retval; count -= retval; } *ppos = src; mmput(mm); free: free_page((unsigned long) page); return ret; } static const struct file_operations proc_environ_operations = { .open = environ_open, .read = environ_read, .llseek = generic_file_llseek, .release = mem_release, }; static int auxv_open(struct inode *inode, struct file *file) { return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); } static ssize_t auxv_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct mm_struct *mm = file->private_data; unsigned int nwords = 0; if (!mm) return 0; do { nwords += 2; } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0])); } static const struct file_operations proc_auxv_operations = { .open = auxv_open, .read = auxv_read, .llseek = generic_file_llseek, .release = mem_release, }; static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; int oom_adj = OOM_ADJUST_MIN; size_t len; if (!task) return -ESRCH; if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) oom_adj = OOM_ADJUST_MAX; else oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX; put_task_struct(task); if (oom_adj > OOM_ADJUST_MAX) oom_adj = OOM_ADJUST_MAX; len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); } static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) { struct mm_struct *mm = NULL; struct task_struct *task; int err = 0; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; mutex_lock(&oom_adj_mutex); if (legacy) { if (oom_adj < task->signal->oom_score_adj && !capable(CAP_SYS_RESOURCE)) { err = -EACCES; goto err_unlock; } /* * /proc/pid/oom_adj is provided for legacy purposes, ask users to use * /proc/pid/oom_score_adj instead. */ pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", current->comm, task_pid_nr(current), task_pid_nr(task), task_pid_nr(task)); } else { if ((short)oom_adj < task->signal->oom_score_adj_min && !capable(CAP_SYS_RESOURCE)) { err = -EACCES; goto err_unlock; } } /* * Make sure we will check other processes sharing the mm if this is * not vfrok which wants its own oom_score_adj. * pin the mm so it doesn't go away and get reused after task_unlock */ if (!task->vfork_done) { struct task_struct *p = find_lock_task_mm(task); if (p) { if (test_bit(MMF_MULTIPROCESS, &p->mm->flags)) { mm = p->mm; mmgrab(mm); } task_unlock(p); } } task->signal->oom_score_adj = oom_adj; if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) task->signal->oom_score_adj_min = (short)oom_adj; trace_oom_score_adj_update(task); if (mm) { struct task_struct *p; rcu_read_lock(); for_each_process(p) { if (same_thread_group(task, p)) continue; /* do not touch kernel threads or the global init */ if (p->flags & PF_KTHREAD || is_global_init(p)) continue; task_lock(p); if (!p->vfork_done && process_shares_mm(p, mm)) { p->signal->oom_score_adj = oom_adj; if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) p->signal->oom_score_adj_min = (short)oom_adj; } task_unlock(p); } rcu_read_unlock(); mmdrop(mm); } err_unlock: mutex_unlock(&oom_adj_mutex); put_task_struct(task); return err; } /* * /proc/pid/oom_adj exists solely for backwards compatibility with previous * kernels. The effective policy is defined by oom_score_adj, which has a * different scale: oom_adj grew exponentially and oom_score_adj grows linearly. * Values written to oom_adj are simply mapped linearly to oom_score_adj. * Processes that become oom disabled via oom_adj will still be oom disabled * with this implementation. * * oom_adj cannot be removed since existing userspace binaries use it. */ static ssize_t oom_adj_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char buffer[PROC_NUMBUF] = {}; int oom_adj; int err; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) { err = -EFAULT; goto out; } err = kstrtoint(strstrip(buffer), 0, &oom_adj); if (err) goto out; if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && oom_adj != OOM_DISABLE) { err = -EINVAL; goto out; } /* * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum * value is always attainable. */ if (oom_adj == OOM_ADJUST_MAX) oom_adj = OOM_SCORE_ADJ_MAX; else oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; err = __set_oom_adj(file, oom_adj, true); out: return err < 0 ? err : count; } static const struct file_operations proc_oom_adj_operations = { .read = oom_adj_read, .write = oom_adj_write, .llseek = generic_file_llseek, }; static ssize_t oom_score_adj_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; short oom_score_adj = OOM_SCORE_ADJ_MIN; size_t len; if (!task) return -ESRCH; oom_score_adj = task->signal->oom_score_adj; put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); } static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char buffer[PROC_NUMBUF] = {}; int oom_score_adj; int err; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) { err = -EFAULT; goto out; } err = kstrtoint(strstrip(buffer), 0, &oom_score_adj); if (err) goto out; if (oom_score_adj < OOM_SCORE_ADJ_MIN || oom_score_adj > OOM_SCORE_ADJ_MAX) { err = -EINVAL; goto out; } err = __set_oom_adj(file, oom_score_adj, false); out: return err < 0 ? err : count; } static const struct file_operations proc_oom_score_adj_operations = { .read = oom_score_adj_read, .write = oom_score_adj_write, .llseek = default_llseek, }; #ifdef CONFIG_AUDIT #define TMPBUFLEN 11 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); struct task_struct *task = get_proc_task(inode); ssize_t length; char tmpbuf[TMPBUFLEN]; if (!task) return -ESRCH; length = scnprintf(tmpbuf, TMPBUFLEN, "%u", from_kuid(file->f_cred->user_ns, audit_get_loginuid(task))); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); uid_t loginuid; kuid_t kloginuid; int rv; /* Don't let kthreads write their own loginuid */ if (current->flags & PF_KTHREAD) return -EPERM; rcu_read_lock(); if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { rcu_read_unlock(); return -EPERM; } rcu_read_unlock(); if (*ppos != 0) { /* No partial writes. */ return -EINVAL; } rv = kstrtou32_from_user(buf, count, 10, &loginuid); if (rv < 0) return rv; /* is userspace tring to explicitly UNSET the loginuid? */ if (loginuid == AUDIT_UID_UNSET) { kloginuid = INVALID_UID; } else { kloginuid = make_kuid(file->f_cred->user_ns, loginuid); if (!uid_valid(kloginuid)) return -EINVAL; } rv = audit_set_loginuid(kloginuid); if (rv < 0) return rv; return count; } static const struct file_operations proc_loginuid_operations = { .read = proc_loginuid_read, .write = proc_loginuid_write, .llseek = generic_file_llseek, }; static ssize_t proc_sessionid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); struct task_struct *task = get_proc_task(inode); ssize_t length; char tmpbuf[TMPBUFLEN]; if (!task) return -ESRCH; length = scnprintf(tmpbuf, TMPBUFLEN, "%u", audit_get_sessionid(task)); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } static const struct file_operations proc_sessionid_operations = { .read = proc_sessionid_read, .llseek = generic_file_llseek, }; #endif #ifdef CONFIG_FAULT_INJECTION static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; size_t len; int make_it_fail; if (!task) return -ESRCH; make_it_fail = task->make_it_fail; put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); return simple_read_from_buffer(buf, count, ppos, buffer, len); } static ssize_t proc_fault_inject_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { struct task_struct *task; char buffer[PROC_NUMBUF] = {}; int make_it_fail; int rv; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; rv = kstrtoint(strstrip(buffer), 0, &make_it_fail); if (rv < 0) return rv; if (make_it_fail < 0 || make_it_fail > 1) return -EINVAL; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; task->make_it_fail = make_it_fail; put_task_struct(task); return count; } static const struct file_operations proc_fault_inject_operations = { .read = proc_fault_inject_read, .write = proc_fault_inject_write, .llseek = generic_file_llseek, }; static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; int err; unsigned int n; err = kstrtouint_from_user(buf, count, 0, &n); if (err) return err; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; task->fail_nth = n; put_task_struct(task); return count; } static ssize_t proc_fail_nth_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; char numbuf[PROC_NUMBUF]; ssize_t len; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, numbuf, len); } static const struct file_operations proc_fail_nth_operations = { .read = proc_fail_nth_read, .write = proc_fail_nth_write, }; #endif #ifdef CONFIG_SCHED_DEBUG /* * Print out various scheduling related per-task fields: */ static int sched_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct pid_namespace *ns = proc_pid_ns(inode->i_sb); struct task_struct *p; p = get_proc_task(inode); if (!p) return -ESRCH; proc_sched_show_task(p, ns, m); put_task_struct(p); return 0; } static ssize_t sched_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct inode *inode = file_inode(file); struct task_struct *p; p = get_proc_task(inode); if (!p) return -ESRCH; proc_sched_set_task(p); put_task_struct(p); return count; } static int sched_open(struct inode *inode, struct file *filp) { return single_open(filp, sched_show, inode); } static const struct file_operations proc_pid_sched_operations = { .open = sched_open, .read = seq_read, .write = sched_write, .llseek = seq_lseek, .release = single_release, }; #endif #ifdef CONFIG_SCHED_AUTOGROUP /* * Print out autogroup related information: */ static int sched_autogroup_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct task_struct *p; p = get_proc_task(inode); if (!p) return -ESRCH; proc_sched_autogroup_show_task(p, m); put_task_struct(p); return 0; } static ssize_t sched_autogroup_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct inode *inode = file_inode(file); struct task_struct *p; char buffer[PROC_NUMBUF] = {}; int nice; int err; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; err = kstrtoint(strstrip(buffer), 0, &nice); if (err < 0) return err; p = get_proc_task(inode); if (!p) return -ESRCH; err = proc_sched_autogroup_set_nice(p, nice); if (err) count = err; put_task_struct(p); return count; } static int sched_autogroup_open(struct inode *inode, struct file *filp) { int ret; ret = single_open(filp, sched_autogroup_show, NULL); if (!ret) { struct seq_file *m = filp->private_data; m->private = inode; } return ret; } static const struct file_operations proc_pid_sched_autogroup_operations = { .open = sched_autogroup_open, .read = seq_read, .write = sched_autogroup_write, .llseek = seq_lseek, .release = single_release, }; #endif /* CONFIG_SCHED_AUTOGROUP */ #ifdef CONFIG_TIME_NS static int timens_offsets_show(struct seq_file *m, void *v) { struct task_struct *p; p = get_proc_task(file_inode(m->file)); if (!p) return -ESRCH; proc_timens_show_offsets(p, m); put_task_struct(p); return 0; } static ssize_t timens_offsets_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct inode *inode = file_inode(file); struct proc_timens_offset offsets[2]; char *kbuf = NULL, *pos, *next_line; struct task_struct *p; int ret, noffsets; /* Only allow < page size writes at the beginning of the file */ if ((*ppos != 0) || (count >= PAGE_SIZE)) return -EINVAL; /* Slurp in the user data */ kbuf = memdup_user_nul(buf, count); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); /* Parse the user data */ ret = -EINVAL; noffsets = 0; for (pos = kbuf; pos; pos = next_line) { struct proc_timens_offset *off = &offsets[noffsets]; char clock[10]; int err; /* Find the end of line and ensure we don't look past it */ next_line = strchr(pos, '\n'); if (next_line) { *next_line = '\0'; next_line++; if (*next_line == '\0') next_line = NULL; } err = sscanf(pos, "%9s %lld %lu", clock, &off->val.tv_sec, &off->val.tv_nsec); if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC) goto out; clock[sizeof(clock) - 1] = 0; if (strcmp(clock, "monotonic") == 0 || strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0) off->clockid = CLOCK_MONOTONIC; else if (strcmp(clock, "boottime") == 0 || strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0) off->clockid = CLOCK_BOOTTIME; else goto out; noffsets++; if (noffsets == ARRAY_SIZE(offsets)) { if (next_line) count = next_line - kbuf; break; } } ret = -ESRCH; p = get_proc_task(inode); if (!p) goto out; ret = proc_timens_set_offset(file, p, offsets, noffsets); put_task_struct(p); if (ret) goto out; ret = count; out: kfree(kbuf); return ret; } static int timens_offsets_open(struct inode *inode, struct file *filp) { return single_open(filp, timens_offsets_show, inode); } static const struct file_operations proc_timens_offsets_operations = { .open = timens_offsets_open, .read = seq_read, .write = timens_offsets_write, .llseek = seq_lseek, .release = single_release, }; #endif /* CONFIG_TIME_NS */ static ssize_t comm_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct inode *inode = file_inode(file); struct task_struct *p; char buffer[TASK_COMM_LEN] = {}; const size_t maxlen = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) return -EFAULT; p = get_proc_task(inode); if (!p) return -ESRCH; if (same_thread_group(current, p)) { set_task_comm(p, buffer); proc_comm_connector(p); } else count = -EINVAL; put_task_struct(p); return count; } static int comm_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct task_struct *p; p = get_proc_task(inode); if (!p) return -ESRCH; proc_task_name(m, p, false); seq_putc(m, '\n'); put_task_struct(p); return 0; } static int comm_open(struct inode *inode, struct file *filp) { return single_open(filp, comm_show, inode); } static const struct file_operations proc_pid_set_comm_operations = { .open = comm_open, .read = seq_read, .write = comm_write, .llseek = seq_lseek, .release = single_release, }; static int proc_exe_link(struct dentry *dentry, struct path *exe_path) { struct task_struct *task; struct file *exe_file; task = get_proc_task(d_inode(dentry)); if (!task) return -ENOENT; exe_file = get_task_exe_file(task); put_task_struct(task); if (exe_file) { *exe_path = exe_file->f_path; path_get(&exe_file->f_path); fput(exe_file); return 0; } else return -ENOENT; } static const char *proc_pid_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct path path; int error = -EACCES; if (!dentry) return ERR_PTR(-ECHILD); /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; error = PROC_I(inode)->op.proc_get_link(dentry, &path); if (error) goto out; error = nd_jump_link(&path); out: return ERR_PTR(error); } static int do_proc_readlink(const struct path *path, char __user *buffer, int buflen) { char *tmp = kmalloc(PATH_MAX, GFP_KERNEL); char *pathname; int len; if (!tmp) return -ENOMEM; pathname = d_path(path, tmp, PATH_MAX); len = PTR_ERR(pathname); if (IS_ERR(pathname)) goto out; len = tmp + PATH_MAX - 1 - pathname; if (len > buflen) len = buflen; if (copy_to_user(buffer, pathname, len)) len = -EFAULT; out: kfree(tmp); return len; } static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) { int error = -EACCES; struct inode *inode = d_inode(dentry); struct path path; /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; error = PROC_I(inode)->op.proc_get_link(dentry, &path); if (error) goto out; error = do_proc_readlink(&path, buffer, buflen); path_put(&path); out: return error; } const struct inode_operations proc_pid_link_inode_operations = { .readlink = proc_pid_readlink, .get_link = proc_pid_get_link, .setattr = proc_setattr, }; /* building an inode */ void task_dump_owner(struct task_struct *task, umode_t mode, kuid_t *ruid, kgid_t *rgid) { /* Depending on the state of dumpable compute who should own a * proc file for a task. */ const struct cred *cred; kuid_t uid; kgid_t gid; if (unlikely(task->flags & PF_KTHREAD)) { *ruid = GLOBAL_ROOT_UID; *rgid = GLOBAL_ROOT_GID; return; } /* Default to the tasks effective ownership */ rcu_read_lock(); cred = __task_cred(task); uid = cred->euid; gid = cred->egid; rcu_read_unlock(); /* * Before the /proc/pid/status file was created the only way to read * the effective uid of a /process was to stat /proc/pid. Reading * /proc/pid/status is slow enough that procps and other packages * kept stating /proc/pid. To keep the rules in /proc simple I have * made this apply to all per process world readable and executable * directories. */ if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) { struct mm_struct *mm; task_lock(task); mm = task->mm; /* Make non-dumpable tasks owned by some root */ if (mm) { if (get_dumpable(mm) != SUID_DUMP_USER) { struct user_namespace *user_ns = mm->user_ns; uid = make_kuid(user_ns, 0); if (!uid_valid(uid)) uid = GLOBAL_ROOT_UID; gid = make_kgid(user_ns, 0); if (!gid_valid(gid)) gid = GLOBAL_ROOT_GID; } } else { uid = GLOBAL_ROOT_UID; gid = GLOBAL_ROOT_GID; } task_unlock(task); } *ruid = uid; *rgid = gid; } void proc_pid_evict_inode(struct proc_inode *ei) { struct pid *pid = ei->pid; if (S_ISDIR(ei->vfs_inode.i_mode)) { spin_lock(&pid->lock); hlist_del_init_rcu(&ei->sibling_inodes); spin_unlock(&pid->lock); } } struct inode *proc_pid_make_inode(struct super_block *sb, struct task_struct *task, umode_t mode) { struct inode * inode; struct proc_inode *ei; struct pid *pid; /* We need a new inode */ inode = new_inode(sb); if (!inode) goto out; /* Common stuff */ ei = PROC_I(inode); inode->i_mode = mode; inode->i_ino = get_next_ino(); simple_inode_init_ts(inode); inode->i_op = &proc_def_inode_operations; /* * grab the reference to task. */ pid = get_task_pid(task, PIDTYPE_PID); if (!pid) goto out_unlock; /* Let the pid remember us for quick removal */ ei->pid = pid; task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); security_task_to_inode(task, inode); out: return inode; out_unlock: iput(inode); return NULL; } /* * Generating an inode and adding it into @pid->inodes, so that task will * invalidate inode's dentry before being released. * * This helper is used for creating dir-type entries under '/proc' and * '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>' * can be released by invalidating '/proc/<tgid>' dentry. * In theory, dentries under '/proc/<tgid>/task' can also be released by * invalidating '/proc/<tgid>' dentry, we reserve it to handle single * thread exiting situation: Any one of threads should invalidate its * '/proc/<tgid>/task/<pid>' dentry before released. */ static struct inode *proc_pid_make_base_inode(struct super_block *sb, struct task_struct *task, umode_t mode) { struct inode *inode; struct proc_inode *ei; struct pid *pid; inode = proc_pid_make_inode(sb, task, mode); if (!inode) return NULL; /* Let proc_flush_pid find this directory inode */ ei = PROC_I(inode); pid = ei->pid; spin_lock(&pid->lock); hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); spin_unlock(&pid->lock); return inode; } int pid_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); struct task_struct *task; generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->uid = GLOBAL_ROOT_UID; stat->gid = GLOBAL_ROOT_GID; rcu_read_lock(); task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) { rcu_read_unlock(); /* * This doesn't prevent learning whether PID exists, * it only makes getattr() consistent with readdir(). */ return -ENOENT; } task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid); } rcu_read_unlock(); return 0; } /* dentry stuff */ /* * Set <pid>/... inode ownership (can change due to setuid(), etc.) */ void pid_update_inode(struct task_struct *task, struct inode *inode) { task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); inode->i_mode &= ~(S_ISUID | S_ISGID); security_task_to_inode(task, inode); } /* * Rewrite the inode's ownerships here because the owning task may have * performed a setuid(), etc. * */ static int pid_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; int ret = 0; rcu_read_lock(); inode = d_inode_rcu(dentry); if (!inode) goto out; task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { pid_update_inode(task, inode); ret = 1; } out: rcu_read_unlock(); return ret; } static inline bool proc_inode_is_dead(struct inode *inode) { return !proc_pid(inode)->tasks[PIDTYPE_PID].first; } int pid_delete_dentry(const struct dentry *dentry) { /* Is the task we represent dead? * If so, then don't put the dentry on the lru list, * kill it immediately. */ return proc_inode_is_dead(d_inode(dentry)); } const struct dentry_operations pid_dentry_operations = { .d_revalidate = pid_revalidate, .d_delete = pid_delete_dentry, }; /* Lookups */ /* * Fill a directory entry. * * If possible create the dcache entry and derive our inode number and * file type from dcache entry. * * Since all of the proc inode numbers are dynamically generated, the inode * numbers do not exist until the inode is cache. This means creating * the dcache entry in readdir is necessary to keep the inode numbers * reported by readdir in sync with the inode numbers reported * by stat. */ bool proc_fill_cache(struct file *file, struct dir_context *ctx, const char *name, unsigned int len, instantiate_t instantiate, struct task_struct *task, const void *ptr) { struct dentry *child, *dir = file->f_path.dentry; struct qstr qname = QSTR_INIT(name, len); struct inode *inode; unsigned type = DT_UNKNOWN; ino_t ino = 1; child = d_hash_and_lookup(dir, &qname); if (!child) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); child = d_alloc_parallel(dir, &qname, &wq); if (IS_ERR(child)) goto end_instantiate; if (d_in_lookup(child)) { struct dentry *res; res = instantiate(child, task, ptr); d_lookup_done(child); if (unlikely(res)) { dput(child); child = res; if (IS_ERR(child)) goto end_instantiate; } } } inode = d_inode(child); ino = inode->i_ino; type = inode->i_mode >> 12; dput(child); end_instantiate: return dir_emit(ctx, name, len, ino, type); } /* * dname_to_vma_addr - maps a dentry name into two unsigned longs * which represent vma start and end addresses. */ static int dname_to_vma_addr(struct dentry *dentry, unsigned long *start, unsigned long *end) { const char *str = dentry->d_name.name; unsigned long long sval, eval; unsigned int len; if (str[0] == '0' && str[1] != '-') return -EINVAL; len = _parse_integer(str, 16, &sval); if (len & KSTRTOX_OVERFLOW) return -EINVAL; if (sval != (unsigned long)sval) return -EINVAL; str += len; if (*str != '-') return -EINVAL; str++; if (str[0] == '0' && str[1]) return -EINVAL; len = _parse_integer(str, 16, &eval); if (len & KSTRTOX_OVERFLOW) return -EINVAL; if (eval != (unsigned long)eval) return -EINVAL; str += len; if (*str != '\0') return -EINVAL; *start = sval; *end = eval; return 0; } static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) { unsigned long vm_start, vm_end; bool exact_vma_exists = false; struct mm_struct *mm = NULL; struct task_struct *task; struct inode *inode; int status = 0; if (flags & LOOKUP_RCU) return -ECHILD; inode = d_inode(dentry); task = get_proc_task(inode); if (!task) goto out_notask; mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); if (IS_ERR_OR_NULL(mm)) goto out; if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { status = mmap_read_lock_killable(mm); if (!status) { exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); mmap_read_unlock(mm); } } mmput(mm); if (exact_vma_exists) { task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); security_task_to_inode(task, inode); status = 1; } out: put_task_struct(task); out_notask: return status; } static const struct dentry_operations tid_map_files_dentry_operations = { .d_revalidate = map_files_d_revalidate, .d_delete = pid_delete_dentry, }; static int map_files_get_link(struct dentry *dentry, struct path *path) { unsigned long vm_start, vm_end; struct vm_area_struct *vma; struct task_struct *task; struct mm_struct *mm; int rc; rc = -ENOENT; task = get_proc_task(d_inode(dentry)); if (!task) goto out; mm = get_task_mm(task); put_task_struct(task); if (!mm) goto out; rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); if (rc) goto out_mmput; rc = mmap_read_lock_killable(mm); if (rc) goto out_mmput; rc = -ENOENT; vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { *path = *file_user_path(vma->vm_file); path_get(path); rc = 0; } mmap_read_unlock(mm); out_mmput: mmput(mm); out: return rc; } struct map_files_info { unsigned long start; unsigned long end; fmode_t mode; }; /* * Only allow CAP_SYS_ADMIN and CAP_CHECKPOINT_RESTORE to follow the links, due * to concerns about how the symlinks may be used to bypass permissions on * ancestor directories in the path to the file in question. */ static const char * proc_map_files_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { if (!checkpoint_restore_ns_capable(&init_user_ns)) return ERR_PTR(-EPERM); return proc_pid_get_link(dentry, inode, done); } /* * Identical to proc_pid_link_inode_operations except for get_link() */ static const struct inode_operations proc_map_files_link_inode_operations = { .readlink = proc_pid_readlink, .get_link = proc_map_files_get_link, .setattr = proc_setattr, }; static struct dentry * proc_map_files_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { fmode_t mode = (fmode_t)(unsigned long)ptr; struct proc_inode *ei; struct inode *inode; inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | ((mode & FMODE_READ ) ? S_IRUSR : 0) | ((mode & FMODE_WRITE) ? S_IWUSR : 0)); if (!inode) return ERR_PTR(-ENOENT); ei = PROC_I(inode); ei->op.proc_get_link = map_files_get_link; inode->i_op = &proc_map_files_link_inode_operations; inode->i_size = 64; return proc_splice_unmountable(inode, dentry, &tid_map_files_dentry_operations); } static struct dentry *proc_map_files_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { unsigned long vm_start, vm_end; struct vm_area_struct *vma; struct task_struct *task; struct dentry *result; struct mm_struct *mm; result = ERR_PTR(-ENOENT); task = get_proc_task(dir); if (!task) goto out; result = ERR_PTR(-EACCES); if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; result = ERR_PTR(-ENOENT); if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) goto out_put_task; mm = get_task_mm(task); if (!mm) goto out_put_task; result = ERR_PTR(-EINTR); if (mmap_read_lock_killable(mm)) goto out_put_mm; result = ERR_PTR(-ENOENT); vma = find_exact_vma(mm, vm_start, vm_end); if (!vma) goto out_no_vma; if (vma->vm_file) result = proc_map_files_instantiate(dentry, task, (void *)(unsigned long)vma->vm_file->f_mode); out_no_vma: mmap_read_unlock(mm); out_put_mm: mmput(mm); out_put_task: put_task_struct(task); out: return result; } static const struct inode_operations proc_map_files_inode_operations = { .lookup = proc_map_files_lookup, .permission = proc_fd_permission, .setattr = proc_setattr, }; static int proc_map_files_readdir(struct file *file, struct dir_context *ctx) { struct vm_area_struct *vma; struct task_struct *task; struct mm_struct *mm; unsigned long nr_files, pos, i; GENRADIX(struct map_files_info) fa; struct map_files_info *p; int ret; struct vma_iterator vmi; genradix_init(&fa); ret = -ENOENT; task = get_proc_task(file_inode(file)); if (!task) goto out; ret = -EACCES; if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; ret = 0; if (!dir_emit_dots(file, ctx)) goto out_put_task; mm = get_task_mm(task); if (!mm) goto out_put_task; ret = mmap_read_lock_killable(mm); if (ret) { mmput(mm); goto out_put_task; } nr_files = 0; /* * We need two passes here: * * 1) Collect vmas of mapped files with mmap_lock taken * 2) Release mmap_lock and instantiate entries * * otherwise we get lockdep complained, since filldir() * routine might require mmap_lock taken in might_fault(). */ pos = 2; vma_iter_init(&vmi, mm, 0); for_each_vma(vmi, vma) { if (!vma->vm_file) continue; if (++pos <= ctx->pos) continue; p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL); if (!p) { ret = -ENOMEM; mmap_read_unlock(mm); mmput(mm); goto out_put_task; } p->start = vma->vm_start; p->end = vma->vm_end; p->mode = vma->vm_file->f_mode; } mmap_read_unlock(mm); mmput(mm); for (i = 0; i < nr_files; i++) { char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */ unsigned int len; p = genradix_ptr(&fa, i); len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end); if (!proc_fill_cache(file, ctx, buf, len, proc_map_files_instantiate, task, (void *)(unsigned long)p->mode)) break; ctx->pos++; } out_put_task: put_task_struct(task); out: genradix_free(&fa); return ret; } static const struct file_operations proc_map_files_operations = { .read = generic_read_dir, .iterate_shared = proc_map_files_readdir, .llseek = generic_file_llseek, }; #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) struct timers_private { struct pid *pid; struct task_struct *task; struct sighand_struct *sighand; struct pid_namespace *ns; unsigned long flags; }; static void *timers_start(struct seq_file *m, loff_t *pos) { struct timers_private *tp = m->private; tp->task = get_pid_task(tp->pid, PIDTYPE_PID); if (!tp->task) return ERR_PTR(-ESRCH); tp->sighand = lock_task_sighand(tp->task, &tp->flags); if (!tp->sighand) return ERR_PTR(-ESRCH); return seq_list_start(&tp->task->signal->posix_timers, *pos); } static void *timers_next(struct seq_file *m, void *v, loff_t *pos) { struct timers_private *tp = m->private; return seq_list_next(v, &tp->task->signal->posix_timers, pos); } static void timers_stop(struct seq_file *m, void *v) { struct timers_private *tp = m->private; if (tp->sighand) { unlock_task_sighand(tp->task, &tp->flags); tp->sighand = NULL; } if (tp->task) { put_task_struct(tp->task); tp->task = NULL; } } static int show_timer(struct seq_file *m, void *v) { struct k_itimer *timer; struct timers_private *tp = m->private; int notify; static const char * const nstr[] = { [SIGEV_SIGNAL] = "signal", [SIGEV_NONE] = "none", [SIGEV_THREAD] = "thread", }; timer = list_entry((struct list_head *)v, struct k_itimer, list); notify = timer->it_sigev_notify; seq_printf(m, "ID: %d\n", timer->it_id); seq_printf(m, "signal: %d/%px\n", timer->sigq->info.si_signo, timer->sigq->info.si_value.sival_ptr); seq_printf(m, "notify: %s/%s.%d\n", nstr[notify & ~SIGEV_THREAD_ID], (notify & SIGEV_THREAD_ID) ? "tid" : "pid", pid_nr_ns(timer->it_pid, tp->ns)); seq_printf(m, "ClockID: %d\n", timer->it_clock); return 0; } static const struct seq_operations proc_timers_seq_ops = { .start = timers_start, .next = timers_next, .stop = timers_stop, .show = show_timer, }; static int proc_timers_open(struct inode *inode, struct file *file) { struct timers_private *tp; tp = __seq_open_private(file, &proc_timers_seq_ops, sizeof(struct timers_private)); if (!tp) return -ENOMEM; tp->pid = proc_pid(inode); tp->ns = proc_pid_ns(inode->i_sb); return 0; } static const struct file_operations proc_timers_operations = { .open = proc_timers_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, }; #endif static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct inode *inode = file_inode(file); struct task_struct *p; u64 slack_ns; int err; err = kstrtoull_from_user(buf, count, 10, &slack_ns); if (err < 0) return err; p = get_proc_task(inode); if (!p) return -ESRCH; if (p != current) { rcu_read_lock(); if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { rcu_read_unlock(); count = -EPERM; goto out; } rcu_read_unlock(); err = security_task_setscheduler(p); if (err) { count = err; goto out; } } task_lock(p); if (slack_ns == 0) p->timer_slack_ns = p->default_timer_slack_ns; else p->timer_slack_ns = slack_ns; task_unlock(p); out: put_task_struct(p); return count; } static int timerslack_ns_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct task_struct *p; int err = 0; p = get_proc_task(inode); if (!p) return -ESRCH; if (p != current) { rcu_read_lock(); if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { rcu_read_unlock(); err = -EPERM; goto out; } rcu_read_unlock(); err = security_task_getscheduler(p); if (err) goto out; } task_lock(p); seq_printf(m, "%llu\n", p->timer_slack_ns); task_unlock(p); out: put_task_struct(p); return err; } static int timerslack_ns_open(struct inode *inode, struct file *filp) { return single_open(filp, timerslack_ns_show, inode); } static const struct file_operations proc_pid_set_timerslack_ns_operations = { .open = timerslack_ns_open, .read = seq_read, .write = timerslack_ns_write, .llseek = seq_lseek, .release = single_release, }; static struct dentry *proc_pident_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; inode = proc_pid_make_inode(dentry->d_sb, task, p->mode); if (!inode) return ERR_PTR(-ENOENT); ei = PROC_I(inode); if (S_ISDIR(inode->i_mode)) set_nlink(inode, 2); /* Use getattr to fix if necessary */ if (p->iop) inode->i_op = p->iop; if (p->fop) inode->i_fop = p->fop; ei->op = p->op; pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); return d_splice_alias(inode, dentry); } static struct dentry *proc_pident_lookup(struct inode *dir, struct dentry *dentry, const struct pid_entry *p, const struct pid_entry *end) { struct task_struct *task = get_proc_task(dir); struct dentry *res = ERR_PTR(-ENOENT); if (!task) goto out_no_task; /* * Yes, it does not scale. And it should not. Don't add * new entries into /proc/<tgid>/ without very good reasons. */ for (; p < end; p++) { if (p->len != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, p->name, p->len)) { res = proc_pident_instantiate(dentry, task, p); break; } } put_task_struct(task); out_no_task: return res; } static int proc_pident_readdir(struct file *file, struct dir_context *ctx, const struct pid_entry *ents, unsigned int nents) { struct task_struct *task = get_proc_task(file_inode(file)); const struct pid_entry *p; if (!task) return -ENOENT; if (!dir_emit_dots(file, ctx)) goto out; if (ctx->pos >= nents + 2) goto out; for (p = ents + (ctx->pos - 2); p < ents + nents; p++) { if (!proc_fill_cache(file, ctx, p->name, p->len, proc_pident_instantiate, task, p)) break; ctx->pos++; } out: put_task_struct(task); return 0; } #ifdef CONFIG_SECURITY static int proc_pid_attr_open(struct inode *inode, struct file *file) { file->private_data = NULL; __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); return 0; } static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); char *p = NULL; ssize_t length; struct task_struct *task = get_proc_task(inode); if (!task) return -ESRCH; length = security_getprocattr(task, PROC_I(inode)->op.lsmid, file->f_path.dentry->d_name.name, &p); put_task_struct(task); if (length > 0) length = simple_read_from_buffer(buf, count, ppos, p, length); kfree(p); return length; } static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); struct task_struct *task; void *page; int rv; /* A task may only write when it was the opener. */ if (file->private_data != current->mm) return -EPERM; rcu_read_lock(); task = pid_task(proc_pid(inode), PIDTYPE_PID); if (!task) { rcu_read_unlock(); return -ESRCH; } /* A task may only write its own attributes. */ if (current != task) { rcu_read_unlock(); return -EACCES; } /* Prevent changes to overridden credentials. */ if (current_cred() != current_real_cred()) { rcu_read_unlock(); return -EBUSY; } rcu_read_unlock(); if (count > PAGE_SIZE) count = PAGE_SIZE; /* No partial writes. */ if (*ppos != 0) return -EINVAL; page = memdup_user(buf, count); if (IS_ERR(page)) { rv = PTR_ERR(page); goto out; } /* Guard against adverse ptrace interaction */ rv = mutex_lock_interruptible(&current->signal->cred_guard_mutex); if (rv < 0) goto out_free; rv = security_setprocattr(PROC_I(inode)->op.lsmid, file->f_path.dentry->d_name.name, page, count); mutex_unlock(&current->signal->cred_guard_mutex); out_free: kfree(page); out: return rv; } static const struct file_operations proc_pid_attr_operations = { .open = proc_pid_attr_open, .read = proc_pid_attr_read, .write = proc_pid_attr_write, .llseek = generic_file_llseek, .release = mem_release, }; #define LSM_DIR_OPS(LSM) \ static int proc_##LSM##_attr_dir_iterate(struct file *filp, \ struct dir_context *ctx) \ { \ return proc_pident_readdir(filp, ctx, \ LSM##_attr_dir_stuff, \ ARRAY_SIZE(LSM##_attr_dir_stuff)); \ } \ \ static const struct file_operations proc_##LSM##_attr_dir_ops = { \ .read = generic_read_dir, \ .iterate_shared = proc_##LSM##_attr_dir_iterate, \ .llseek = default_llseek, \ }; \ \ static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \ struct dentry *dentry, unsigned int flags) \ { \ return proc_pident_lookup(dir, dentry, \ LSM##_attr_dir_stuff, \ LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \ } \ \ static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \ .lookup = proc_##LSM##_attr_dir_lookup, \ .getattr = pid_getattr, \ .setattr = proc_setattr, \ } #ifdef CONFIG_SECURITY_SMACK static const struct pid_entry smack_attr_dir_stuff[] = { ATTR(LSM_ID_SMACK, "current", 0666), }; LSM_DIR_OPS(smack); #endif #ifdef CONFIG_SECURITY_APPARMOR static const struct pid_entry apparmor_attr_dir_stuff[] = { ATTR(LSM_ID_APPARMOR, "current", 0666), ATTR(LSM_ID_APPARMOR, "prev", 0444), ATTR(LSM_ID_APPARMOR, "exec", 0666), }; LSM_DIR_OPS(apparmor); #endif static const struct pid_entry attr_dir_stuff[] = { ATTR(LSM_ID_UNDEF, "current", 0666), ATTR(LSM_ID_UNDEF, "prev", 0444), ATTR(LSM_ID_UNDEF, "exec", 0666), ATTR(LSM_ID_UNDEF, "fscreate", 0666), ATTR(LSM_ID_UNDEF, "keycreate", 0666), ATTR(LSM_ID_UNDEF, "sockcreate", 0666), #ifdef CONFIG_SECURITY_SMACK DIR("smack", 0555, proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops), #endif #ifdef CONFIG_SECURITY_APPARMOR DIR("apparmor", 0555, proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops), #endif }; static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) { return proc_pident_readdir(file, ctx, attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); } static const struct file_operations proc_attr_dir_operations = { .read = generic_read_dir, .iterate_shared = proc_attr_dir_readdir, .llseek = generic_file_llseek, }; static struct dentry *proc_attr_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, attr_dir_stuff, attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff)); } static const struct inode_operations proc_attr_dir_inode_operations = { .lookup = proc_attr_dir_lookup, .getattr = pid_getattr, .setattr = proc_setattr, }; #endif #ifdef CONFIG_ELF_CORE static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task = get_proc_task(file_inode(file)); struct mm_struct *mm; char buffer[PROC_NUMBUF]; size_t len; int ret; if (!task) return -ESRCH; ret = 0; mm = get_task_mm(task); if (mm) { len = snprintf(buffer, sizeof(buffer), "%08lx\n", ((mm->flags & MMF_DUMP_FILTER_MASK) >> MMF_DUMP_FILTER_SHIFT)); mmput(mm); ret = simple_read_from_buffer(buf, count, ppos, buffer, len); } put_task_struct(task); return ret; } static ssize_t proc_coredump_filter_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; struct mm_struct *mm; unsigned int val; int ret; int i; unsigned long mask; ret = kstrtouint_from_user(buf, count, 0, &val); if (ret < 0) return ret; ret = -ESRCH; task = get_proc_task(file_inode(file)); if (!task) goto out_no_task; mm = get_task_mm(task); if (!mm) goto out_no_mm; ret = 0; for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { if (val & mask) set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); else clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); } mmput(mm); out_no_mm: put_task_struct(task); out_no_task: if (ret < 0) return ret; return count; } static const struct file_operations proc_coredump_filter_operations = { .read = proc_coredump_filter_read, .write = proc_coredump_filter_write, .llseek = generic_file_llseek, }; #endif #ifdef CONFIG_TASK_IO_ACCOUNTING static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole) { struct task_io_accounting acct; int result; result = down_read_killable(&task->signal->exec_update_lock); if (result) return result; if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { result = -EACCES; goto out_unlock; } if (whole) { struct signal_struct *sig = task->signal; struct task_struct *t; unsigned int seq = 1; unsigned long flags; rcu_read_lock(); do { seq++; /* 2 on the 1st/lockless path, otherwise odd */ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); acct = sig->ioac; __for_each_thread(sig, t) task_io_accounting_add(&acct, &t->ioac); } while (need_seqretry(&sig->stats_lock, seq)); done_seqretry_irqrestore(&sig->stats_lock, seq, flags); rcu_read_unlock(); } else { acct = task->ioac; } seq_printf(m, "rchar: %llu\n" "wchar: %llu\n" "syscr: %llu\n" "syscw: %llu\n" "read_bytes: %llu\n" "write_bytes: %llu\n" "cancelled_write_bytes: %llu\n", (unsigned long long)acct.rchar, (unsigned long long)acct.wchar, (unsigned long long)acct.syscr, (unsigned long long)acct.syscw, (unsigned long long)acct.read_bytes, (unsigned long long)acct.write_bytes, (unsigned long long)acct.cancelled_write_bytes); result = 0; out_unlock: up_read(&task->signal->exec_update_lock); return result; } static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { return do_io_accounting(task, m, 0); } static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { return do_io_accounting(task, m, 1); } #endif /* CONFIG_TASK_IO_ACCOUNTING */ #ifdef CONFIG_USER_NS static int proc_id_map_open(struct inode *inode, struct file *file, const struct seq_operations *seq_ops) { struct user_namespace *ns = NULL; struct task_struct *task; struct seq_file *seq; int ret = -EINVAL; task = get_proc_task(inode); if (task) { rcu_read_lock(); ns = get_user_ns(task_cred_xxx(task, user_ns)); rcu_read_unlock(); put_task_struct(task); } if (!ns) goto err; ret = seq_open(file, seq_ops); if (ret) goto err_put_ns; seq = file->private_data; seq->private = ns; return 0; err_put_ns: put_user_ns(ns); err: return ret; } static int proc_id_map_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; put_user_ns(ns); return seq_release(inode, file); } static int proc_uid_map_open(struct inode *inode, struct file *file) { return proc_id_map_open(inode, file, &proc_uid_seq_operations); } static int proc_gid_map_open(struct inode *inode, struct file *file) { return proc_id_map_open(inode, file, &proc_gid_seq_operations); } static int proc_projid_map_open(struct inode *inode, struct file *file) { return proc_id_map_open(inode, file, &proc_projid_seq_operations); } static const struct file_operations proc_uid_map_operations = { .open = proc_uid_map_open, .write = proc_uid_map_write, .read = seq_read, .llseek = seq_lseek, .release = proc_id_map_release, }; static const struct file_operations proc_gid_map_operations = { .open = proc_gid_map_open, .write = proc_gid_map_write, .read = seq_read, .llseek = seq_lseek, .release = proc_id_map_release, }; static const struct file_operations proc_projid_map_operations = { .open = proc_projid_map_open, .write = proc_projid_map_write, .read = seq_read, .llseek = seq_lseek, .release = proc_id_map_release, }; static int proc_setgroups_open(struct inode *inode, struct file *file) { struct user_namespace *ns = NULL; struct task_struct *task; int ret; ret = -ESRCH; task = get_proc_task(inode); if (task) { rcu_read_lock(); ns = get_user_ns(task_cred_xxx(task, user_ns)); rcu_read_unlock(); put_task_struct(task); } if (!ns) goto err; if (file->f_mode & FMODE_WRITE) { ret = -EACCES; if (!ns_capable(ns, CAP_SYS_ADMIN)) goto err_put_ns; } ret = single_open(file, &proc_setgroups_show, ns); if (ret) goto err_put_ns; return 0; err_put_ns: put_user_ns(ns); err: return ret; } static int proc_setgroups_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; int ret = single_release(inode, file); put_user_ns(ns); return ret; } static const struct file_operations proc_setgroups_operations = { .open = proc_setgroups_open, .write = proc_setgroups_write, .read = seq_read, .llseek = seq_lseek, .release = proc_setgroups_release, }; #endif /* CONFIG_USER_NS */ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { int err = lock_trace(task); if (!err) { seq_printf(m, "%08x\n", task->personality); unlock_trace(task); } return err; } #ifdef CONFIG_LIVEPATCH static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { seq_printf(m, "%d\n", task->patch_state); return 0; } #endif /* CONFIG_LIVEPATCH */ #ifdef CONFIG_KSM static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { struct mm_struct *mm; mm = get_task_mm(task); if (mm) { seq_printf(m, "%lu\n", mm->ksm_merging_pages); mmput(mm); } return 0; } static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { struct mm_struct *mm; mm = get_task_mm(task); if (mm) { seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items); seq_printf(m, "ksm_zero_pages %ld\n", mm_ksm_zero_pages(mm)); seq_printf(m, "ksm_merging_pages %lu\n", mm->ksm_merging_pages); seq_printf(m, "ksm_process_profit %ld\n", ksm_process_profit(mm)); mmput(mm); } return 0; } #endif /* CONFIG_KSM */ #ifdef CONFIG_STACKLEAK_METRICS static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long prev_depth = THREAD_SIZE - (task->prev_lowest_stack & (THREAD_SIZE - 1)); unsigned long depth = THREAD_SIZE - (task->lowest_stack & (THREAD_SIZE - 1)); seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n", prev_depth, depth); return 0; } #endif /* CONFIG_STACKLEAK_METRICS */ /* * Thread groups */ static const struct file_operations proc_task_operations; static const struct inode_operations proc_task_inode_operations; static const struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), DIR("fdinfo", S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), #ifdef CONFIG_NET DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), #endif REG("environ", S_IRUSR, proc_environ_operations), REG("auxv", S_IRUSR, proc_auxv_operations), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), ONE("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif #ifdef CONFIG_SCHED_AUTOGROUP REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), #endif #ifdef CONFIG_TIME_NS REG("timens_offsets", S_IRUGO|S_IWUSR, proc_timens_offsets_operations), #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK ONE("syscall", S_IRUSR, proc_pid_syscall), #endif REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), ONE("stat", S_IRUGO, proc_tgid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_pid_maps_operations), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), #endif REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), LNK("cwd", proc_cwd_link), LNK("root", proc_root_link), LNK("exe", proc_exe_link), REG("mounts", S_IRUGO, proc_mounts_operations), REG("mountinfo", S_IRUGO, proc_mountinfo_operations), REG("mountstats", S_IRUSR, proc_mountstats_operations), #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_pid_smaps_operations), REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS ONE("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHED_INFO ONE("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET ONE("cpuset", S_IRUGO, proc_cpuset_show), #endif #ifdef CONFIG_CGROUPS ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif #ifdef CONFIG_PROC_CPU_RESCTRL ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), REG("fail-nth", 0644, proc_fail_nth_operations), #endif #ifdef CONFIG_ELF_CORE REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING ONE("io", S_IRUSR, proc_tgid_io_accounting), #endif #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) REG("timers", S_IRUGO, proc_timers_operations), #endif REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif #ifdef CONFIG_STACKLEAK_METRICS ONE("stack_depth", S_IRUGO, proc_stack_depth), #endif #ifdef CONFIG_PROC_PID_ARCH_STATUS ONE("arch_status", S_IRUGO, proc_pid_arch_status), #endif #ifdef CONFIG_SECCOMP_CACHE_DEBUG ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), #endif #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), #endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) { return proc_pident_readdir(file, ctx, tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); } static const struct file_operations proc_tgid_base_operations = { .read = generic_read_dir, .iterate_shared = proc_tgid_base_readdir, .llseek = generic_file_llseek, }; struct pid *tgid_pidfd_to_pid(const struct file *file) { if (file->f_op != &proc_tgid_base_operations) return ERR_PTR(-EBADF); return proc_pid(file_inode(file)); } static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, tgid_base_stuff, tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff)); } static const struct inode_operations proc_tgid_base_inode_operations = { .lookup = proc_tgid_base_lookup, .getattr = pid_getattr, .setattr = proc_setattr, .permission = proc_pid_permission, }; /** * proc_flush_pid - Remove dcache entries for @pid from the /proc dcache. * @pid: pid that should be flushed. * * This function walks a list of inodes (that belong to any proc * filesystem) that are attached to the pid and flushes them from * the dentry cache. * * It is safe and reasonable to cache /proc entries for a task until * that task exits. After that they just clog up the dcache with * useless entries, possibly causing useful dcache entries to be * flushed instead. This routine is provided to flush those useless * dcache entries when a process is reaped. * * NOTE: This routine is just an optimization so it does not guarantee * that no dcache entries will exist after a process is reaped * it just makes it very unlikely that any will persist. */ void proc_flush_pid(struct pid *pid) { proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock); } static struct dentry *proc_pid_instantiate(struct dentry * dentry, struct task_struct *task, const void *ptr) { struct inode *inode; inode = proc_pid_make_base_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) return ERR_PTR(-ENOENT); inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; set_nlink(inode, nlink_tgid); pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); return d_splice_alias(inode, dentry); } struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags) { struct task_struct *task; unsigned tgid; struct proc_fs_info *fs_info; struct pid_namespace *ns; struct dentry *result = ERR_PTR(-ENOENT); tgid = name_to_int(&dentry->d_name); if (tgid == ~0U) goto out; fs_info = proc_sb_info(dentry->d_sb); ns = fs_info->pid_ns; rcu_read_lock(); task = find_task_by_pid_ns(tgid, ns); if (task) get_task_struct(task); rcu_read_unlock(); if (!task) goto out; /* Limit procfs to only ptraceable tasks */ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) { if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS)) goto out_put_task; } result = proc_pid_instantiate(dentry, task, NULL); out_put_task: put_task_struct(task); out: return result; } /* * Find the first task with tgid >= tgid * */ struct tgid_iter { unsigned int tgid; struct task_struct *task; }; static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) { struct pid *pid; if (iter.task) put_task_struct(iter.task); rcu_read_lock(); retry: iter.task = NULL; pid = find_ge_pid(iter.tgid, ns); if (pid) { iter.tgid = pid_nr_ns(pid, ns); iter.task = pid_task(pid, PIDTYPE_TGID); if (!iter.task) { iter.tgid += 1; goto retry; } get_task_struct(iter.task); } rcu_read_unlock(); return iter; } #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2) /* for the /proc/ directory itself, after non-process stuff has been done */ int proc_pid_readdir(struct file *file, struct dir_context *ctx) { struct tgid_iter iter; struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb); struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb); loff_t pos = ctx->pos; if (pos >= PID_MAX_LIMIT + TGID_OFFSET) return 0; if (pos == TGID_OFFSET - 2) { struct inode *inode = d_inode(fs_info->proc_self); if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) return 0; ctx->pos = pos = pos + 1; } if (pos == TGID_OFFSET - 1) { struct inode *inode = d_inode(fs_info->proc_thread_self); if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK)) return 0; ctx->pos = pos = pos + 1; } iter.tgid = pos - TGID_OFFSET; iter.task = NULL; for (iter = next_tgid(ns, iter); iter.task; iter.tgid += 1, iter = next_tgid(ns, iter)) { char name[10 + 1]; unsigned int len; cond_resched(); if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE)) continue; len = snprintf(name, sizeof(name), "%u", iter.tgid); ctx->pos = iter.tgid + TGID_OFFSET; if (!proc_fill_cache(file, ctx, name, len, proc_pid_instantiate, iter.task, NULL)) { put_task_struct(iter.task); return 0; } } ctx->pos = PID_MAX_LIMIT + TGID_OFFSET; return 0; } /* * proc_tid_comm_permission is a special permission function exclusively * used for the node /proc/<pid>/task/<tid>/comm. * It bypasses generic permission checks in the case where a task of the same * task group attempts to access the node. * The rationale behind this is that glibc and bionic access this node for * cross thread naming (pthread_set/getname_np(!self)). However, if * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0, * which locks out the cross thread naming implementation. * This function makes sure that the node is always accessible for members of * same thread group. */ static int proc_tid_comm_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { bool is_same_tgroup; struct task_struct *task; task = get_proc_task(inode); if (!task) return -ESRCH; is_same_tgroup = same_thread_group(current, task); put_task_struct(task); if (likely(is_same_tgroup && !(mask & MAY_EXEC))) { /* This file (/proc/<pid>/task/<tid>/comm) can always be * read or written by the members of the corresponding * thread group. */ return 0; } return generic_permission(&nop_mnt_idmap, inode, mask); } static const struct inode_operations proc_tid_comm_inode_operations = { .setattr = proc_setattr, .permission = proc_tid_comm_permission, }; /* * Tasks */ static const struct pid_entry tid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("fdinfo", S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), #ifdef CONFIG_NET DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), #endif REG("environ", S_IRUSR, proc_environ_operations), REG("auxv", S_IRUSR, proc_auxv_operations), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), ONE("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif NOD("comm", S_IFREG|S_IRUGO|S_IWUSR, &proc_tid_comm_inode_operations, &proc_pid_set_comm_operations, {}), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK ONE("syscall", S_IRUSR, proc_pid_syscall), #endif REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), ONE("stat", S_IRUGO, proc_tid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_pid_maps_operations), #ifdef CONFIG_PROC_CHILDREN REG("children", S_IRUGO, proc_tid_children_operations), #endif #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), #endif REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), LNK("cwd", proc_cwd_link), LNK("root", proc_root_link), LNK("exe", proc_exe_link), REG("mounts", S_IRUGO, proc_mounts_operations), REG("mountinfo", S_IRUGO, proc_mountinfo_operations), #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_pid_smaps_operations), REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS ONE("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHED_INFO ONE("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET ONE("cpuset", S_IRUGO, proc_cpuset_show), #endif #ifdef CONFIG_CGROUPS ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif #ifdef CONFIG_PROC_CPU_RESCTRL ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), REG("fail-nth", 0644, proc_fail_nth_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING ONE("io", S_IRUSR, proc_tid_io_accounting), #endif #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif #ifdef CONFIG_PROC_PID_ARCH_STATUS ONE("arch_status", S_IRUGO, proc_pid_arch_status), #endif #ifdef CONFIG_SECCOMP_CACHE_DEBUG ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), #endif #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), #endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) { return proc_pident_readdir(file, ctx, tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); } static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, tid_base_stuff, tid_base_stuff + ARRAY_SIZE(tid_base_stuff)); } static const struct file_operations proc_tid_base_operations = { .read = generic_read_dir, .iterate_shared = proc_tid_base_readdir, .llseek = generic_file_llseek, }; static const struct inode_operations proc_tid_base_inode_operations = { .lookup = proc_tid_base_lookup, .getattr = pid_getattr, .setattr = proc_setattr, }; static struct dentry *proc_task_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { struct inode *inode; inode = proc_pid_make_base_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) return ERR_PTR(-ENOENT); inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; inode->i_flags |= S_IMMUTABLE; set_nlink(inode, nlink_tid); pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); return d_splice_alias(inode, dentry); } static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct task_struct *task; struct task_struct *leader = get_proc_task(dir); unsigned tid; struct proc_fs_info *fs_info; struct pid_namespace *ns; struct dentry *result = ERR_PTR(-ENOENT); if (!leader) goto out_no_task; tid = name_to_int(&dentry->d_name); if (tid == ~0U) goto out; fs_info = proc_sb_info(dentry->d_sb); ns = fs_info->pid_ns; rcu_read_lock(); task = find_task_by_pid_ns(tid, ns); if (task) get_task_struct(task); rcu_read_unlock(); if (!task) goto out; if (!same_thread_group(leader, task)) goto out_drop_task; result = proc_task_instantiate(dentry, task, NULL); out_drop_task: put_task_struct(task); out: put_task_struct(leader); out_no_task: return result; } /* * Find the first tid of a thread group to return to user space. * * Usually this is just the thread group leader, but if the users * buffer was too small or there was a seek into the middle of the * directory we have more work todo. * * In the case of a short read we start with find_task_by_pid. * * In the case of a seek we start with the leader and walk nr * threads past it. */ static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos, struct pid_namespace *ns) { struct task_struct *pos, *task; unsigned long nr = f_pos; if (nr != f_pos) /* 32bit overflow? */ return NULL; rcu_read_lock(); task = pid_task(pid, PIDTYPE_PID); if (!task) goto fail; /* Attempt to start with the tid of a thread */ if (tid && nr) { pos = find_task_by_pid_ns(tid, ns); if (pos && same_thread_group(pos, task)) goto found; } /* If nr exceeds the number of threads there is nothing todo */ if (nr >= get_nr_threads(task)) goto fail; /* If we haven't found our starting place yet start * with the leader and walk nr threads forward. */ for_each_thread(task, pos) { if (!nr--) goto found; } fail: pos = NULL; goto out; found: get_task_struct(pos); out: rcu_read_unlock(); return pos; } /* * Find the next thread in the thread list. * Return NULL if there is an error or no next thread. * * The reference to the input task_struct is released. */ static struct task_struct *next_tid(struct task_struct *start) { struct task_struct *pos = NULL; rcu_read_lock(); if (pid_alive(start)) { pos = __next_thread(start); if (pos) get_task_struct(pos); } rcu_read_unlock(); put_task_struct(start); return pos; } /* for the /proc/TGID/task/ directories */ static int proc_task_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct task_struct *task; struct pid_namespace *ns; int tid; if (proc_inode_is_dead(inode)) return -ENOENT; if (!dir_emit_dots(file, ctx)) return 0; /* We cache the tgid value that the last readdir call couldn't * return and lseek resets it to 0. */ ns = proc_pid_ns(inode->i_sb); tid = (int)(intptr_t)file->private_data; file->private_data = NULL; for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; task = next_tid(task), ctx->pos++) { char name[10 + 1]; unsigned int len; tid = task_pid_nr_ns(task, ns); if (!tid) continue; /* The task has just exited. */ len = snprintf(name, sizeof(name), "%u", tid); if (!proc_fill_cache(file, ctx, name, len, proc_task_instantiate, task, NULL)) { /* returning this tgid failed, save it as the first * pid for the next readir call */ file->private_data = (void *)(intptr_t)tid; put_task_struct(task); break; } } return 0; } static int proc_task_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct task_struct *p = get_proc_task(inode); generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); if (p) { stat->nlink += get_nr_threads(p); put_task_struct(p); } return 0; } /* * proc_task_readdir() set @file->private_data to a positive integer * value, so casting that to u64 is safe. generic_llseek_cookie() will * set @cookie to 0, so casting to an int is safe. The WARN_ON_ONCE() is * here to catch any unexpected change in behavior either in * proc_task_readdir() or generic_llseek_cookie(). */ static loff_t proc_dir_llseek(struct file *file, loff_t offset, int whence) { u64 cookie = (u64)(intptr_t)file->private_data; loff_t off; off = generic_llseek_cookie(file, offset, whence, &cookie); WARN_ON_ONCE(cookie > INT_MAX); file->private_data = (void *)(intptr_t)cookie; /* serialized by f_pos_lock */ return off; } static const struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, .getattr = proc_task_getattr, .setattr = proc_setattr, .permission = proc_pid_permission, }; static const struct file_operations proc_task_operations = { .read = generic_read_dir, .iterate_shared = proc_task_readdir, .llseek = proc_dir_llseek, }; void __init set_proc_pid_nlink(void) { nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); }
3 6 137 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 /* * Copyright (C) 2011-2013 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef DRM_RECT_H #define DRM_RECT_H #include <linux/types.h> /** * DOC: rect utils * * Utility functions to help manage rectangular areas for * clipping, scaling, etc. calculations. */ /** * struct drm_rect - two dimensional rectangle * @x1: horizontal starting coordinate (inclusive) * @x2: horizontal ending coordinate (exclusive) * @y1: vertical starting coordinate (inclusive) * @y2: vertical ending coordinate (exclusive) * * Note that this must match the layout of struct drm_mode_rect or the damage * helpers like drm_atomic_helper_damage_iter_init() break. */ struct drm_rect { int x1, y1, x2, y2; }; /** * DRM_RECT_INIT - initialize a rectangle from x/y/w/h * @x: x coordinate * @y: y coordinate * @w: width * @h: height * * RETURNS: * A new rectangle of the specified size. */ #define DRM_RECT_INIT(x, y, w, h) ((struct drm_rect){ \ .x1 = (x), \ .y1 = (y), \ .x2 = (x) + (w), \ .y2 = (y) + (h) }) /** * DRM_RECT_FMT - printf string for &struct drm_rect */ #define DRM_RECT_FMT "%dx%d%+d%+d" /** * DRM_RECT_ARG - printf arguments for &struct drm_rect * @r: rectangle struct */ #define DRM_RECT_ARG(r) drm_rect_width(r), drm_rect_height(r), (r)->x1, (r)->y1 /** * DRM_RECT_FP_FMT - printf string for &struct drm_rect in 16.16 fixed point */ #define DRM_RECT_FP_FMT "%d.%06ux%d.%06u%+d.%06u%+d.%06u" /** * DRM_RECT_FP_ARG - printf arguments for &struct drm_rect in 16.16 fixed point * @r: rectangle struct * * This is useful for e.g. printing plane source rectangles, which are in 16.16 * fixed point. */ #define DRM_RECT_FP_ARG(r) \ drm_rect_width(r) >> 16, ((drm_rect_width(r) & 0xffff) * 15625) >> 10, \ drm_rect_height(r) >> 16, ((drm_rect_height(r) & 0xffff) * 15625) >> 10, \ (r)->x1 >> 16, (((r)->x1 & 0xffff) * 15625) >> 10, \ (r)->y1 >> 16, (((r)->y1 & 0xffff) * 15625) >> 10 /** * drm_rect_init - initialize the rectangle from x/y/w/h * @r: rectangle * @x: x coordinate * @y: y coordinate * @width: width * @height: height */ static inline void drm_rect_init(struct drm_rect *r, int x, int y, int width, int height) { r->x1 = x; r->y1 = y; r->x2 = x + width; r->y2 = y + height; } /** * drm_rect_adjust_size - adjust the size of the rectangle * @r: rectangle to be adjusted * @dw: horizontal adjustment * @dh: vertical adjustment * * Change the size of rectangle @r by @dw in the horizontal direction, * and by @dh in the vertical direction, while keeping the center * of @r stationary. * * Positive @dw and @dh increase the size, negative values decrease it. */ static inline void drm_rect_adjust_size(struct drm_rect *r, int dw, int dh) { r->x1 -= dw >> 1; r->y1 -= dh >> 1; r->x2 += (dw + 1) >> 1; r->y2 += (dh + 1) >> 1; } /** * drm_rect_translate - translate the rectangle * @r: rectangle to be translated * @dx: horizontal translation * @dy: vertical translation * * Move rectangle @r by @dx in the horizontal direction, * and by @dy in the vertical direction. */ static inline void drm_rect_translate(struct drm_rect *r, int dx, int dy) { r->x1 += dx; r->y1 += dy; r->x2 += dx; r->y2 += dy; } /** * drm_rect_translate_to - translate the rectangle to an absolute position * @r: rectangle to be translated * @x: horizontal position * @y: vertical position * * Move rectangle @r to @x in the horizontal direction, * and to @y in the vertical direction. */ static inline void drm_rect_translate_to(struct drm_rect *r, int x, int y) { drm_rect_translate(r, x - r->x1, y - r->y1); } /** * drm_rect_downscale - downscale a rectangle * @r: rectangle to be downscaled * @horz: horizontal downscale factor * @vert: vertical downscale factor * * Divide the coordinates of rectangle @r by @horz and @vert. */ static inline void drm_rect_downscale(struct drm_rect *r, int horz, int vert) { r->x1 /= horz; r->y1 /= vert; r->x2 /= horz; r->y2 /= vert; } /** * drm_rect_width - determine the rectangle width * @r: rectangle whose width is returned * * RETURNS: * The width of the rectangle. */ static inline int drm_rect_width(const struct drm_rect *r) { return r->x2 - r->x1; } /** * drm_rect_height - determine the rectangle height * @r: rectangle whose height is returned * * RETURNS: * The height of the rectangle. */ static inline int drm_rect_height(const struct drm_rect *r) { return r->y2 - r->y1; } /** * drm_rect_visible - determine if the rectangle is visible * @r: rectangle whose visibility is returned * * RETURNS: * %true if the rectangle is visible, %false otherwise. */ static inline bool drm_rect_visible(const struct drm_rect *r) { return drm_rect_width(r) > 0 && drm_rect_height(r) > 0; } /** * drm_rect_equals - determine if two rectangles are equal * @r1: first rectangle * @r2: second rectangle * * RETURNS: * %true if the rectangles are equal, %false otherwise. */ static inline bool drm_rect_equals(const struct drm_rect *r1, const struct drm_rect *r2) { return r1->x1 == r2->x1 && r1->x2 == r2->x2 && r1->y1 == r2->y1 && r1->y2 == r2->y2; } /** * drm_rect_fp_to_int - Convert a rect in 16.16 fixed point form to int form. * @dst: rect to be stored the converted value * @src: rect in 16.16 fixed point form */ static inline void drm_rect_fp_to_int(struct drm_rect *dst, const struct drm_rect *src) { drm_rect_init(dst, src->x1 >> 16, src->y1 >> 16, drm_rect_width(src) >> 16, drm_rect_height(src) >> 16); } bool drm_rect_intersect(struct drm_rect *r, const struct drm_rect *clip); bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst, const struct drm_rect *clip); int drm_rect_calc_hscale(const struct drm_rect *src, const struct drm_rect *dst, int min_hscale, int max_hscale); int drm_rect_calc_vscale(const struct drm_rect *src, const struct drm_rect *dst, int min_vscale, int max_vscale); void drm_rect_debug_print(const char *prefix, const struct drm_rect *r, bool fixed_point); void drm_rect_rotate(struct drm_rect *r, int width, int height, unsigned int rotation); void drm_rect_rotate_inv(struct drm_rect *r, int width, int height, unsigned int rotation); #endif
11 2 9 11 11 11 11 11 11 11 11 3 8 1 6 7 7 8 8 7 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 /* * llc_sap.c - driver routines for SAP component. * * Copyright (c) 1997 by Procom Technology, Inc. * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <net/llc.h> #include <net/llc_if.h> #include <net/llc_conn.h> #include <net/llc_pdu.h> #include <net/llc_sap.h> #include <net/llc_s_ac.h> #include <net/llc_s_ev.h> #include <net/llc_s_st.h> #include <net/sock.h> #include <net/tcp_states.h> #include <linux/llc.h> #include <linux/slab.h> static int llc_mac_header_len(unsigned short devtype) { switch (devtype) { case ARPHRD_ETHER: case ARPHRD_LOOPBACK: return sizeof(struct ethhdr); } return 0; } /** * llc_alloc_frame - allocates sk_buff for frame * @sk: socket to allocate frame to * @dev: network device this skb will be sent over * @type: pdu type to allocate * @data_size: data size to allocate * * Allocates an sk_buff for frame and initializes sk_buff fields. * Returns allocated skb or %NULL when out of memory. */ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev, u8 type, u32 data_size) { int hlen = type == LLC_PDU_TYPE_U ? 3 : 4; struct sk_buff *skb; hlen += llc_mac_header_len(dev->type); skb = alloc_skb(hlen + data_size, GFP_ATOMIC); if (skb) { skb_reset_mac_header(skb); skb_reserve(skb, hlen); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb->protocol = htons(ETH_P_802_2); skb->dev = dev; if (sk != NULL) skb_set_owner_w(skb, sk); } return skb; } void llc_save_primitive(struct sock *sk, struct sk_buff *skb, u8 prim) { struct sockaddr_llc *addr; /* save primitive for use by the user. */ addr = llc_ui_skb_cb(skb); memset(addr, 0, sizeof(*addr)); addr->sllc_family = sk->sk_family; addr->sllc_arphrd = skb->dev->type; addr->sllc_test = prim == LLC_TEST_PRIM; addr->sllc_xid = prim == LLC_XID_PRIM; addr->sllc_ua = prim == LLC_DATAUNIT_PRIM; llc_pdu_decode_sa(skb, addr->sllc_mac); llc_pdu_decode_ssap(skb, &addr->sllc_sap); } /** * llc_sap_rtn_pdu - Informs upper layer on rx of an UI, XID or TEST pdu. * @sap: pointer to SAP * @skb: received pdu */ void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); switch (LLC_U_PDU_RSP(pdu)) { case LLC_1_PDU_CMD_TEST: ev->prim = LLC_TEST_PRIM; break; case LLC_1_PDU_CMD_XID: ev->prim = LLC_XID_PRIM; break; case LLC_1_PDU_CMD_UI: ev->prim = LLC_DATAUNIT_PRIM; break; } ev->ind_cfm_flag = LLC_IND; } /** * llc_find_sap_trans - finds transition for event * @sap: pointer to SAP * @skb: happened event * * This function finds transition that matches with happened event. * Returns the pointer to found transition on success or %NULL for * failure. */ static const struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap, struct sk_buff *skb) { int i = 0; const struct llc_sap_state_trans *rc = NULL; const struct llc_sap_state_trans **next_trans; struct llc_sap_state *curr_state = &llc_sap_state_table[sap->state - 1]; /* * Search thru events for this state until list exhausted or until * its obvious the event is not valid for the current state */ for (next_trans = curr_state->transitions; next_trans[i]->ev; i++) if (!next_trans[i]->ev(sap, skb)) { rc = next_trans[i]; /* got event match; return it */ break; } return rc; } /** * llc_exec_sap_trans_actions - execute actions related to event * @sap: pointer to SAP * @trans: pointer to transition that it's actions must be performed * @skb: happened event. * * This function executes actions that is related to happened event. * Returns 0 for success and 1 for failure of at least one action. */ static int llc_exec_sap_trans_actions(struct llc_sap *sap, const struct llc_sap_state_trans *trans, struct sk_buff *skb) { int rc = 0; const llc_sap_action_t *next_action = trans->ev_actions; for (; next_action && *next_action; next_action++) if ((*next_action)(sap, skb)) rc = 1; return rc; } /** * llc_sap_next_state - finds transition, execs actions & change SAP state * @sap: pointer to SAP * @skb: happened event * * This function finds transition that matches with happened event, then * executes related actions and finally changes state of SAP. It returns * 0 on success and 1 for failure. */ static int llc_sap_next_state(struct llc_sap *sap, struct sk_buff *skb) { const struct llc_sap_state_trans *trans; int rc = 1; if (sap->state > LLC_NR_SAP_STATES) goto out; trans = llc_find_sap_trans(sap, skb); if (!trans) goto out; /* * Got the state to which we next transition; perform the actions * associated with this transition before actually transitioning to the * next state */ rc = llc_exec_sap_trans_actions(sap, trans, skb); if (rc) goto out; /* * Transition SAP to next state if all actions execute successfully */ sap->state = trans->next_state; out: return rc; } /** * llc_sap_state_process - sends event to SAP state machine * @sap: sap to use * @skb: pointer to occurred event * * After executing actions of the event, upper layer will be indicated * if needed(on receiving an UI frame). sk can be null for the * datalink_proto case. * * This function always consumes a reference to the skb. */ static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->ind_cfm_flag = 0; llc_sap_next_state(sap, skb); if (ev->ind_cfm_flag == LLC_IND && skb->sk->sk_state != TCP_LISTEN) { llc_save_primitive(skb->sk, skb, ev->prim); /* queue skb to the user. */ if (sock_queue_rcv_skb(skb->sk, skb) == 0) return; } kfree_skb(skb); } /** * llc_build_and_send_test_pkt - TEST interface for upper layers. * @sap: sap to use * @skb: packet to send * @dmac: destination mac address * @dsap: destination sap * * This function is called when upper layer wants to send a TEST pdu. * Returns 0 for success, 1 otherwise. */ void llc_build_and_send_test_pkt(struct llc_sap *sap, struct sk_buff *skb, u8 *dmac, u8 dsap) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->saddr.lsap = sap->laddr.lsap; ev->daddr.lsap = dsap; memcpy(ev->saddr.mac, skb->dev->dev_addr, IFHWADDRLEN); memcpy(ev->daddr.mac, dmac, IFHWADDRLEN); ev->type = LLC_SAP_EV_TYPE_PRIM; ev->prim = LLC_TEST_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; llc_sap_state_process(sap, skb); } /** * llc_build_and_send_xid_pkt - XID interface for upper layers * @sap: sap to use * @skb: packet to send * @dmac: destination mac address * @dsap: destination sap * * This function is called when upper layer wants to send a XID pdu. * Returns 0 for success, 1 otherwise. */ void llc_build_and_send_xid_pkt(struct llc_sap *sap, struct sk_buff *skb, u8 *dmac, u8 dsap) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->saddr.lsap = sap->laddr.lsap; ev->daddr.lsap = dsap; memcpy(ev->saddr.mac, skb->dev->dev_addr, IFHWADDRLEN); memcpy(ev->daddr.mac, dmac, IFHWADDRLEN); ev->type = LLC_SAP_EV_TYPE_PRIM; ev->prim = LLC_XID_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; llc_sap_state_process(sap, skb); } /** * llc_sap_rcv - sends received pdus to the sap state machine * @sap: current sap component structure. * @skb: received frame. * @sk: socket to associate to frame * * Sends received pdus to the sap state machine. */ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, struct sock *sk) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->type = LLC_SAP_EV_TYPE_PDU; ev->reason = 0; skb_orphan(skb); sock_hold(sk); skb->sk = sk; skb->destructor = sock_efree; llc_sap_state_process(sap, skb); } static inline bool llc_dgram_match(const struct llc_sap *sap, const struct llc_addr *laddr, const struct sock *sk, const struct net *net) { struct llc_sock *llc = llc_sk(sk); return sk->sk_type == SOCK_DGRAM && net_eq(sock_net(sk), net) && llc->laddr.lsap == laddr->lsap && ether_addr_equal(llc->laddr.mac, laddr->mac); } /** * llc_lookup_dgram - Finds dgram socket for the local sap/mac * @sap: SAP * @laddr: address of local LLC (MAC + SAP) * @net: netns to look up a socket in * * Search socket list of the SAP and finds connection using the local * mac, and local sap. Returns pointer for socket found, %NULL otherwise. */ static struct sock *llc_lookup_dgram(struct llc_sap *sap, const struct llc_addr *laddr, const struct net *net) { struct sock *rc; struct hlist_nulls_node *node; int slot = llc_sk_laddr_hashfn(sap, laddr); struct hlist_nulls_head *laddr_hb = &sap->sk_laddr_hash[slot]; rcu_read_lock_bh(); again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_dgram_match(sap, laddr, rc, net)) { /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || !llc_dgram_match(sap, laddr, rc, net))) { sock_put(rc); continue; } goto found; } } rc = NULL; /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (unlikely(get_nulls_value(node) != slot)) goto again; found: rcu_read_unlock_bh(); return rc; } static inline bool llc_mcast_match(const struct llc_sap *sap, const struct llc_addr *laddr, const struct sk_buff *skb, const struct sock *sk) { struct llc_sock *llc = llc_sk(sk); return sk->sk_type == SOCK_DGRAM && llc->laddr.lsap == laddr->lsap && llc->dev == skb->dev; } static void llc_do_mcast(struct llc_sap *sap, struct sk_buff *skb, struct sock **stack, int count) { struct sk_buff *skb1; int i; for (i = 0; i < count; i++) { skb1 = skb_clone(skb, GFP_ATOMIC); if (!skb1) { sock_put(stack[i]); continue; } llc_sap_rcv(sap, skb1, stack[i]); sock_put(stack[i]); } } /** * llc_sap_mcast - Deliver multicast PDU's to all matching datagram sockets. * @sap: SAP * @laddr: address of local LLC (MAC + SAP) * @skb: PDU to deliver * * Search socket list of the SAP and finds connections with same sap. * Deliver clone to each. */ static void llc_sap_mcast(struct llc_sap *sap, const struct llc_addr *laddr, struct sk_buff *skb) { int i = 0; struct sock *sk; struct sock *stack[256 / sizeof(struct sock *)]; struct llc_sock *llc; struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex); spin_lock_bh(&sap->sk_lock); hlist_for_each_entry(llc, dev_hb, dev_hash_node) { sk = &llc->sk; if (!llc_mcast_match(sap, laddr, skb, sk)) continue; sock_hold(sk); if (i < ARRAY_SIZE(stack)) stack[i++] = sk; else { llc_do_mcast(sap, skb, stack, i); i = 0; } } spin_unlock_bh(&sap->sk_lock); llc_do_mcast(sap, skb, stack, i); } void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb) { struct llc_addr laddr; llc_pdu_decode_da(skb, laddr.mac); llc_pdu_decode_dsap(skb, &laddr.lsap); if (is_multicast_ether_addr(laddr.mac)) { llc_sap_mcast(sap, &laddr, skb); kfree_skb(skb); } else { struct sock *sk = llc_lookup_dgram(sap, &laddr, dev_net(skb->dev)); if (sk) { llc_sap_rcv(sap, skb, sk); sock_put(sk); } else kfree_skb(skb); } }
23 26 7 39 6 98 52 17 4 6 16 11 2 10 10 6 99 102 50 101 5 97 24 95 45 32 32 1 31 29 30 10 7 13 30 30 1 11 18 29 29 31 18 18 18 9 9 18 18 12 4 8 8 8 12 10 7 7 7 2 3 6 6 4 3 7 7 7 5 2 7 7 8 8 1 7 98 49 24 36 59 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/minix/dir.c * * Copyright (C) 1991, 1992 Linus Torvalds * * minix directory handling functions * * Updated to filesystem version 3 by Daniel Aragones */ #include "minix.h" #include <linux/buffer_head.h> #include <linux/highmem.h> #include <linux/swap.h> typedef struct minix_dir_entry minix_dirent; typedef struct minix3_dir_entry minix3_dirent; static int minix_readdir(struct file *, struct dir_context *); const struct file_operations minix_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = minix_readdir, .fsync = generic_file_fsync, }; /* * Return the offset into page `page_nr' of the last valid * byte in that page, plus one. */ static unsigned minix_last_byte(struct inode *inode, unsigned long page_nr) { unsigned last_byte = PAGE_SIZE; if (page_nr == (inode->i_size >> PAGE_SHIFT)) last_byte = inode->i_size & (PAGE_SIZE - 1); return last_byte; } static void dir_commit_chunk(struct folio *folio, loff_t pos, unsigned len) { struct address_space *mapping = folio->mapping; struct inode *dir = mapping->host; block_write_end(NULL, mapping, pos, len, len, folio, NULL); if (pos+len > dir->i_size) { i_size_write(dir, pos+len); mark_inode_dirty(dir); } folio_unlock(folio); } static int minix_handle_dirsync(struct inode *dir) { int err; err = filemap_write_and_wait(dir->i_mapping); if (!err) err = sync_inode_metadata(dir, 1); return err; } static void *dir_get_folio(struct inode *dir, unsigned long n, struct folio **foliop) { struct folio *folio = read_mapping_folio(dir->i_mapping, n, NULL); if (IS_ERR(folio)) return ERR_CAST(folio); *foliop = folio; return kmap_local_folio(folio, 0); } static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi) { return (void*)((char*)de + sbi->s_dirsize); } static int minix_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct minix_sb_info *sbi = minix_sb(sb); unsigned chunk_size = sbi->s_dirsize; unsigned long npages = dir_pages(inode); unsigned long pos = ctx->pos; unsigned offset; unsigned long n; ctx->pos = pos = ALIGN(pos, chunk_size); if (pos >= inode->i_size) return 0; offset = pos & ~PAGE_MASK; n = pos >> PAGE_SHIFT; for ( ; n < npages; n++, offset = 0) { char *p, *kaddr, *limit; struct folio *folio; kaddr = dir_get_folio(inode, n, &folio); if (IS_ERR(kaddr)) continue; p = kaddr+offset; limit = kaddr + minix_last_byte(inode, n) - chunk_size; for ( ; p <= limit; p = minix_next_entry(p, sbi)) { const char *name; __u32 inumber; if (sbi->s_version == MINIX_V3) { minix3_dirent *de3 = (minix3_dirent *)p; name = de3->name; inumber = de3->inode; } else { minix_dirent *de = (minix_dirent *)p; name = de->name; inumber = de->inode; } if (inumber) { unsigned l = strnlen(name, sbi->s_namelen); if (!dir_emit(ctx, name, l, inumber, DT_UNKNOWN)) { folio_release_kmap(folio, p); return 0; } } ctx->pos += chunk_size; } folio_release_kmap(folio, kaddr); } return 0; } static inline int namecompare(int len, int maxlen, const char * name, const char * buffer) { if (len < maxlen && buffer[len]) return 0; return !memcmp(name, buffer, len); } /* * minix_find_entry() * * finds an entry in the specified directory with the wanted name. * It does NOT read the inode of the * entry - you'll have to do that yourself if you want to. * * On Success folio_release_kmap() should be called on *foliop. */ minix_dirent *minix_find_entry(struct dentry *dentry, struct folio **foliop) { const char * name = dentry->d_name.name; int namelen = dentry->d_name.len; struct inode * dir = d_inode(dentry->d_parent); struct super_block * sb = dir->i_sb; struct minix_sb_info * sbi = minix_sb(sb); unsigned long n; unsigned long npages = dir_pages(dir); char *p; char *namx; __u32 inumber; for (n = 0; n < npages; n++) { char *kaddr, *limit; kaddr = dir_get_folio(dir, n, foliop); if (IS_ERR(kaddr)) continue; limit = kaddr + minix_last_byte(dir, n) - sbi->s_dirsize; for (p = kaddr; p <= limit; p = minix_next_entry(p, sbi)) { if (sbi->s_version == MINIX_V3) { minix3_dirent *de3 = (minix3_dirent *)p; namx = de3->name; inumber = de3->inode; } else { minix_dirent *de = (minix_dirent *)p; namx = de->name; inumber = de->inode; } if (!inumber) continue; if (namecompare(namelen, sbi->s_namelen, name, namx)) goto found; } folio_release_kmap(*foliop, kaddr); } return NULL; found: return (minix_dirent *)p; } int minix_add_link(struct dentry *dentry, struct inode *inode) { struct inode *dir = d_inode(dentry->d_parent); const char * name = dentry->d_name.name; int namelen = dentry->d_name.len; struct super_block * sb = dir->i_sb; struct minix_sb_info * sbi = minix_sb(sb); struct folio *folio = NULL; unsigned long npages = dir_pages(dir); unsigned long n; char *kaddr, *p; minix_dirent *de; minix3_dirent *de3; loff_t pos; int err; char *namx = NULL; __u32 inumber; /* * We take care of directory expansion in the same loop * This code plays outside i_size, so it locks the page * to protect that region. */ for (n = 0; n <= npages; n++) { char *limit, *dir_end; kaddr = dir_get_folio(dir, n, &folio); if (IS_ERR(kaddr)) return PTR_ERR(kaddr); folio_lock(folio); dir_end = kaddr + minix_last_byte(dir, n); limit = kaddr + PAGE_SIZE - sbi->s_dirsize; for (p = kaddr; p <= limit; p = minix_next_entry(p, sbi)) { de = (minix_dirent *)p; de3 = (minix3_dirent *)p; if (sbi->s_version == MINIX_V3) { namx = de3->name; inumber = de3->inode; } else { namx = de->name; inumber = de->inode; } if (p == dir_end) { /* We hit i_size */ if (sbi->s_version == MINIX_V3) de3->inode = 0; else de->inode = 0; goto got_it; } if (!inumber) goto got_it; err = -EEXIST; if (namecompare(namelen, sbi->s_namelen, name, namx)) goto out_unlock; } folio_unlock(folio); folio_release_kmap(folio, kaddr); } BUG(); return -EINVAL; got_it: pos = folio_pos(folio) + offset_in_folio(folio, p); err = minix_prepare_chunk(folio, pos, sbi->s_dirsize); if (err) goto out_unlock; memcpy (namx, name, namelen); if (sbi->s_version == MINIX_V3) { memset (namx + namelen, 0, sbi->s_dirsize - namelen - 4); de3->inode = inode->i_ino; } else { memset (namx + namelen, 0, sbi->s_dirsize - namelen - 2); de->inode = inode->i_ino; } dir_commit_chunk(folio, pos, sbi->s_dirsize); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); mark_inode_dirty(dir); err = minix_handle_dirsync(dir); out_put: folio_release_kmap(folio, kaddr); return err; out_unlock: folio_unlock(folio); goto out_put; } int minix_delete_entry(struct minix_dir_entry *de, struct folio *folio) { struct inode *inode = folio->mapping->host; loff_t pos = folio_pos(folio) + offset_in_folio(folio, de); struct minix_sb_info *sbi = minix_sb(inode->i_sb); unsigned len = sbi->s_dirsize; int err; folio_lock(folio); err = minix_prepare_chunk(folio, pos, len); if (err) { folio_unlock(folio); return err; } if (sbi->s_version == MINIX_V3) ((minix3_dirent *)de)->inode = 0; else de->inode = 0; dir_commit_chunk(folio, pos, len); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); mark_inode_dirty(inode); return minix_handle_dirsync(inode); } int minix_make_empty(struct inode *inode, struct inode *dir) { struct folio *folio = filemap_grab_folio(inode->i_mapping, 0); struct minix_sb_info *sbi = minix_sb(inode->i_sb); char *kaddr; int err; if (IS_ERR(folio)) return PTR_ERR(folio); err = minix_prepare_chunk(folio, 0, 2 * sbi->s_dirsize); if (err) { folio_unlock(folio); goto fail; } kaddr = kmap_local_folio(folio, 0); memset(kaddr, 0, folio_size(folio)); if (sbi->s_version == MINIX_V3) { minix3_dirent *de3 = (minix3_dirent *)kaddr; de3->inode = inode->i_ino; strcpy(de3->name, "."); de3 = minix_next_entry(de3, sbi); de3->inode = dir->i_ino; strcpy(de3->name, ".."); } else { minix_dirent *de = (minix_dirent *)kaddr; de->inode = inode->i_ino; strcpy(de->name, "."); de = minix_next_entry(de, sbi); de->inode = dir->i_ino; strcpy(de->name, ".."); } kunmap_local(kaddr); dir_commit_chunk(folio, 0, 2 * sbi->s_dirsize); err = minix_handle_dirsync(inode); fail: folio_put(folio); return err; } /* * routine to check that the specified directory is empty (for rmdir) */ int minix_empty_dir(struct inode * inode) { struct folio *folio = NULL; unsigned long i, npages = dir_pages(inode); struct minix_sb_info *sbi = minix_sb(inode->i_sb); char *name, *kaddr; __u32 inumber; for (i = 0; i < npages; i++) { char *p, *limit; kaddr = dir_get_folio(inode, i, &folio); if (IS_ERR(kaddr)) continue; limit = kaddr + minix_last_byte(inode, i) - sbi->s_dirsize; for (p = kaddr; p <= limit; p = minix_next_entry(p, sbi)) { if (sbi->s_version == MINIX_V3) { minix3_dirent *de3 = (minix3_dirent *)p; name = de3->name; inumber = de3->inode; } else { minix_dirent *de = (minix_dirent *)p; name = de->name; inumber = de->inode; } if (inumber != 0) { /* check for . and .. */ if (name[0] != '.') goto not_empty; if (!name[1]) { if (inumber != inode->i_ino) goto not_empty; } else if (name[1] != '.') goto not_empty; else if (name[2]) goto not_empty; } } folio_release_kmap(folio, kaddr); } return 1; not_empty: folio_release_kmap(folio, kaddr); return 0; } /* Releases the page */ int minix_set_link(struct minix_dir_entry *de, struct folio *folio, struct inode *inode) { struct inode *dir = folio->mapping->host; struct minix_sb_info *sbi = minix_sb(dir->i_sb); loff_t pos = folio_pos(folio) + offset_in_folio(folio, de); int err; folio_lock(folio); err = minix_prepare_chunk(folio, pos, sbi->s_dirsize); if (err) { folio_unlock(folio); return err; } if (sbi->s_version == MINIX_V3) ((minix3_dirent *)de)->inode = inode->i_ino; else de->inode = inode->i_ino; dir_commit_chunk(folio, pos, sbi->s_dirsize); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); mark_inode_dirty(dir); return minix_handle_dirsync(dir); } struct minix_dir_entry *minix_dotdot(struct inode *dir, struct folio **foliop) { struct minix_sb_info *sbi = minix_sb(dir->i_sb); struct minix_dir_entry *de = dir_get_folio(dir, 0, foliop); if (!IS_ERR(de)) return minix_next_entry(de, sbi); return NULL; } ino_t minix_inode_by_name(struct dentry *dentry) { struct folio *folio; struct minix_dir_entry *de = minix_find_entry(dentry, &folio); ino_t res = 0; if (de) { struct inode *inode = folio->mapping->host; struct minix_sb_info *sbi = minix_sb(inode->i_sb); if (sbi->s_version == MINIX_V3) res = ((minix3_dirent *) de)->inode; else res = de->inode; folio_release_kmap(folio, de); } return res; }
2 1 1 1 2 2 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 // SPDX-License-Identifier: GPL-2.0-or-later /* * SQ905 subdriver * * Copyright (C) 2008, 2009 Adam Baker and Theodore Kilgore */ /* * History and Acknowledgments * * The original Linux driver for SQ905 based cameras was written by * Marcell Lengyel and further developed by many other contributors * and is available from http://sourceforge.net/projects/sqcam/ * * This driver takes advantage of the reverse engineering work done for * that driver and for libgphoto2 but shares no code with them. * * This driver has used as a base the finepix driver and other gspca * based drivers and may still contain code fragments taken from those * drivers. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "sq905" #include <linux/workqueue.h> #include <linux/slab.h> #include "gspca.h" MODULE_AUTHOR("Adam Baker <linux@baker-net.org.uk>, Theodore Kilgore <kilgota@auburn.edu>"); MODULE_DESCRIPTION("GSPCA/SQ905 USB Camera Driver"); MODULE_LICENSE("GPL"); /* Default timeouts, in ms */ #define SQ905_CMD_TIMEOUT 500 #define SQ905_DATA_TIMEOUT 1000 /* Maximum transfer size to use. */ #define SQ905_MAX_TRANSFER 0x8000 #define FRAME_HEADER_LEN 64 /* The known modes, or registers. These go in the "value" slot. */ /* 00 is "none" obviously */ #define SQ905_BULK_READ 0x03 /* precedes any bulk read */ #define SQ905_COMMAND 0x06 /* precedes the command codes below */ #define SQ905_PING 0x07 /* when reading an "idling" command */ #define SQ905_READ_DONE 0xc0 /* ack bulk read completed */ /* Any non-zero value in the bottom 2 bits of the 2nd byte of * the ID appears to indicate the camera can do 640*480. If the * LSB of that byte is set the image is just upside down, otherwise * it is rotated 180 degrees. */ #define SQ905_HIRES_MASK 0x00000300 #define SQ905_ORIENTATION_MASK 0x00000100 /* Some command codes. These go in the "index" slot. */ #define SQ905_ID 0xf0 /* asks for model string */ #define SQ905_CONFIG 0x20 /* gets photo alloc. table, not used here */ #define SQ905_DATA 0x30 /* accesses photo data, not used here */ #define SQ905_CLEAR 0xa0 /* clear everything */ #define SQ905_CAPTURE_LOW 0x60 /* Starts capture at 160x120 */ #define SQ905_CAPTURE_MED 0x61 /* Starts capture at 320x240 */ #define SQ905_CAPTURE_HIGH 0x62 /* Starts capture at 640x480 (some cams only) */ /* note that the capture command also controls the output dimensions */ /* Structure to hold all of our device specific stuff */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ /* * Driver stuff */ struct work_struct work_struct; struct workqueue_struct *work_thread; }; static struct v4l2_pix_format sq905_mode[] = { { 160, 120, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, { 320, 240, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, { 640, 480, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0} }; /* * Send a command to the camera. */ static int sq905_command(struct gspca_dev *gspca_dev, u16 index) { int ret; gspca_dev->usb_buf[0] = '\0'; ret = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), USB_REQ_SYNCH_FRAME, /* request */ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, SQ905_COMMAND, index, gspca_dev->usb_buf, 1, SQ905_CMD_TIMEOUT); if (ret < 0) { pr_err("%s: usb_control_msg failed (%d)\n", __func__, ret); return ret; } ret = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), USB_REQ_SYNCH_FRAME, /* request */ USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, SQ905_PING, 0, gspca_dev->usb_buf, 1, SQ905_CMD_TIMEOUT); if (ret < 0) { pr_err("%s: usb_control_msg failed 2 (%d)\n", __func__, ret); return ret; } return 0; } /* * Acknowledge the end of a frame - see warning on sq905_command. */ static int sq905_ack_frame(struct gspca_dev *gspca_dev) { int ret; gspca_dev->usb_buf[0] = '\0'; ret = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), USB_REQ_SYNCH_FRAME, /* request */ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, SQ905_READ_DONE, 0, gspca_dev->usb_buf, 1, SQ905_CMD_TIMEOUT); if (ret < 0) { pr_err("%s: usb_control_msg failed (%d)\n", __func__, ret); return ret; } return 0; } /* * request and read a block of data - see warning on sq905_command. */ static int sq905_read_data(struct gspca_dev *gspca_dev, u8 *data, int size, int need_lock) { int ret; int act_len = 0; gspca_dev->usb_buf[0] = '\0'; if (need_lock) mutex_lock(&gspca_dev->usb_lock); ret = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), USB_REQ_SYNCH_FRAME, /* request */ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, SQ905_BULK_READ, size, gspca_dev->usb_buf, 1, SQ905_CMD_TIMEOUT); if (need_lock) mutex_unlock(&gspca_dev->usb_lock); if (ret < 0) { pr_err("%s: usb_control_msg failed (%d)\n", __func__, ret); return ret; } ret = usb_bulk_msg(gspca_dev->dev, usb_rcvbulkpipe(gspca_dev->dev, 0x81), data, size, &act_len, SQ905_DATA_TIMEOUT); /* successful, it returns 0, otherwise negative */ if (ret < 0 || act_len != size) { pr_err("bulk read fail (%d) len %d/%d\n", ret, act_len, size); return -EIO; } return 0; } /* * This function is called as a workqueue function and runs whenever the camera * is streaming data. Because it is a workqueue function it is allowed to sleep * so we can use synchronous USB calls. To avoid possible collisions with other * threads attempting to use gspca_dev->usb_buf we take the usb_lock when * performing USB operations using it. In practice we don't really need this * as the camera doesn't provide any controls. */ static void sq905_dostream(struct work_struct *work) { struct sd *dev = container_of(work, struct sd, work_struct); struct gspca_dev *gspca_dev = &dev->gspca_dev; int bytes_left; /* bytes remaining in current frame. */ int data_len; /* size to use for the next read. */ int header_read; /* true if we have already read the frame header. */ int packet_type; int frame_sz; int ret; u8 *data; u8 *buffer; buffer = kmalloc(SQ905_MAX_TRANSFER, GFP_KERNEL); if (!buffer) { pr_err("Couldn't allocate USB buffer\n"); goto quit_stream; } frame_sz = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].sizeimage + FRAME_HEADER_LEN; while (gspca_dev->present && gspca_dev->streaming) { #ifdef CONFIG_PM if (gspca_dev->frozen) break; #endif /* request some data and then read it until we have * a complete frame. */ bytes_left = frame_sz; header_read = 0; /* Note we do not check for gspca_dev->streaming here, as we must finish reading an entire frame, otherwise the next time we stream we start reading in the middle of a frame. */ while (bytes_left > 0 && gspca_dev->present) { data_len = bytes_left > SQ905_MAX_TRANSFER ? SQ905_MAX_TRANSFER : bytes_left; ret = sq905_read_data(gspca_dev, buffer, data_len, 1); if (ret < 0) goto quit_stream; gspca_dbg(gspca_dev, D_PACK, "Got %d bytes out of %d for frame\n", data_len, bytes_left); bytes_left -= data_len; data = buffer; if (!header_read) { packet_type = FIRST_PACKET; /* The first 64 bytes of each frame are * a header full of FF 00 bytes */ data += FRAME_HEADER_LEN; data_len -= FRAME_HEADER_LEN; header_read = 1; } else if (bytes_left == 0) { packet_type = LAST_PACKET; } else { packet_type = INTER_PACKET; } gspca_frame_add(gspca_dev, packet_type, data, data_len); /* If entire frame fits in one packet we still need to add a LAST_PACKET */ if (packet_type == FIRST_PACKET && bytes_left == 0) gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); } if (gspca_dev->present) { /* acknowledge the frame */ mutex_lock(&gspca_dev->usb_lock); ret = sq905_ack_frame(gspca_dev); mutex_unlock(&gspca_dev->usb_lock); if (ret < 0) goto quit_stream; } } quit_stream: if (gspca_dev->present) { mutex_lock(&gspca_dev->usb_lock); sq905_command(gspca_dev, SQ905_CLEAR); mutex_unlock(&gspca_dev->usb_lock); } kfree(buffer); } /* This function is called at probe time just before sd_init */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct cam *cam = &gspca_dev->cam; struct sd *dev = (struct sd *) gspca_dev; /* We don't use the buffer gspca allocates so make it small. */ cam->bulk = 1; cam->bulk_size = 64; INIT_WORK(&dev->work_struct, sq905_dostream); return 0; } /* called on streamoff with alt==0 and on disconnect */ /* the usb_lock is held at entry - restore on exit */ static void sd_stop0(struct gspca_dev *gspca_dev) { struct sd *dev = (struct sd *) gspca_dev; /* wait for the work queue to terminate */ mutex_unlock(&gspca_dev->usb_lock); /* This waits for sq905_dostream to finish */ destroy_workqueue(dev->work_thread); dev->work_thread = NULL; mutex_lock(&gspca_dev->usb_lock); } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { u32 ident; int ret; /* connect to the camera and read * the model ID and process that and put it away. */ ret = sq905_command(gspca_dev, SQ905_CLEAR); if (ret < 0) return ret; ret = sq905_command(gspca_dev, SQ905_ID); if (ret < 0) return ret; ret = sq905_read_data(gspca_dev, gspca_dev->usb_buf, 4, 0); if (ret < 0) return ret; /* usb_buf is allocated with kmalloc so is aligned. * Camera model number is the right way round if we assume this * reverse engineered ID is supposed to be big endian. */ ident = be32_to_cpup((__be32 *)gspca_dev->usb_buf); ret = sq905_command(gspca_dev, SQ905_CLEAR); if (ret < 0) return ret; gspca_dbg(gspca_dev, D_CONF, "SQ905 camera ID %08x detected\n", ident); gspca_dev->cam.cam_mode = sq905_mode; gspca_dev->cam.nmodes = ARRAY_SIZE(sq905_mode); if (!(ident & SQ905_HIRES_MASK)) gspca_dev->cam.nmodes--; if (ident & SQ905_ORIENTATION_MASK) gspca_dev->cam.input_flags = V4L2_IN_ST_VFLIP; else gspca_dev->cam.input_flags = V4L2_IN_ST_VFLIP | V4L2_IN_ST_HFLIP; return 0; } /* Set up for getting frames. */ static int sd_start(struct gspca_dev *gspca_dev) { struct sd *dev = (struct sd *) gspca_dev; int ret; /* "Open the shutter" and set size, to start capture */ switch (gspca_dev->curr_mode) { default: /* case 2: */ gspca_dbg(gspca_dev, D_STREAM, "Start streaming at high resolution\n"); ret = sq905_command(&dev->gspca_dev, SQ905_CAPTURE_HIGH); break; case 1: gspca_dbg(gspca_dev, D_STREAM, "Start streaming at medium resolution\n"); ret = sq905_command(&dev->gspca_dev, SQ905_CAPTURE_MED); break; case 0: gspca_dbg(gspca_dev, D_STREAM, "Start streaming at low resolution\n"); ret = sq905_command(&dev->gspca_dev, SQ905_CAPTURE_LOW); } if (ret < 0) { gspca_err(gspca_dev, "Start streaming command failed\n"); return ret; } /* Start the workqueue function to do the streaming */ dev->work_thread = create_singlethread_workqueue(MODULE_NAME); if (!dev->work_thread) return -ENOMEM; queue_work(dev->work_thread, &dev->work_struct); return 0; } /* Table of supported USB devices */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x2770, 0x9120)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .start = sd_start, .stop0 = sd_stop0, }; /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
19 21 34 42 34 22 20 20 19 20 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 /* * llc_core.c - Minimum needed routines for sap handling and module init/exit * * Copyright (c) 1997 by Procom Technology, Inc. * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <linux/module.h> #include <linux/interrupt.h> #include <linux/if_ether.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/init.h> #include <net/net_namespace.h> #include <net/llc.h> LIST_HEAD(llc_sap_list); static DEFINE_SPINLOCK(llc_sap_list_lock); /** * llc_sap_alloc - allocates and initializes sap. * * Allocates and initializes sap. */ static struct llc_sap *llc_sap_alloc(void) { struct llc_sap *sap = kzalloc(sizeof(*sap), GFP_ATOMIC); int i; if (sap) { /* sap->laddr.mac - leave as a null, it's filled by bind */ sap->state = LLC_SAP_STATE_ACTIVE; spin_lock_init(&sap->sk_lock); for (i = 0; i < LLC_SK_LADDR_HASH_ENTRIES; i++) INIT_HLIST_NULLS_HEAD(&sap->sk_laddr_hash[i], i); refcount_set(&sap->refcnt, 1); } return sap; } static struct llc_sap *__llc_sap_find(unsigned char sap_value) { struct llc_sap *sap; list_for_each_entry(sap, &llc_sap_list, node) if (sap->laddr.lsap == sap_value) goto out; sap = NULL; out: return sap; } /** * llc_sap_find - searches a SAP in station * @sap_value: sap to be found * * Searches for a sap in the sap list of the LLC's station upon the sap ID. * If the sap is found it will be refcounted and the user will have to do * a llc_sap_put after use. * Returns the sap or %NULL if not found. */ struct llc_sap *llc_sap_find(unsigned char sap_value) { struct llc_sap *sap; rcu_read_lock_bh(); sap = __llc_sap_find(sap_value); if (!sap || !llc_sap_hold_safe(sap)) sap = NULL; rcu_read_unlock_bh(); return sap; } /** * llc_sap_open - open interface to the upper layers. * @lsap: SAP number. * @func: rcv func for datalink protos * * Interface function to upper layer. Each one who wants to get a SAP * (for example NetBEUI) should call this function. Returns the opened * SAP for success, NULL for failure. */ struct llc_sap *llc_sap_open(unsigned char lsap, int (*func)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)) { struct llc_sap *sap = NULL; spin_lock_bh(&llc_sap_list_lock); if (__llc_sap_find(lsap)) /* SAP already exists */ goto out; sap = llc_sap_alloc(); if (!sap) goto out; sap->laddr.lsap = lsap; sap->rcv_func = func; list_add_tail_rcu(&sap->node, &llc_sap_list); out: spin_unlock_bh(&llc_sap_list_lock); return sap; } /** * llc_sap_close - close interface for upper layers. * @sap: SAP to be closed. * * Close interface function to upper layer. Each one who wants to * close an open SAP (for example NetBEUI) should call this function. * Removes this sap from the list of saps in the station and then * frees the memory for this sap. */ void llc_sap_close(struct llc_sap *sap) { WARN_ON(sap->sk_count); spin_lock_bh(&llc_sap_list_lock); list_del_rcu(&sap->node); spin_unlock_bh(&llc_sap_list_lock); kfree_rcu(sap, rcu); } static struct packet_type llc_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_802_2), .func = llc_rcv, }; static int __init llc_init(void) { dev_add_pack(&llc_packet_type); return 0; } static void __exit llc_exit(void) { dev_remove_pack(&llc_packet_type); } module_init(llc_init); module_exit(llc_exit); EXPORT_SYMBOL(llc_sap_list); EXPORT_SYMBOL(llc_sap_find); EXPORT_SYMBOL(llc_sap_open); EXPORT_SYMBOL(llc_sap_close); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Procom 1997, Jay Schullist 2001, Arnaldo C. Melo 2001-2003"); MODULE_DESCRIPTION("LLC IEEE 802.2 core support");
41 42 6 14 8 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2012 Fusion-io All rights reserved. * Copyright (C) 2012 Intel Corp. All rights reserved. */ #include <linux/sched.h> #include <linux/bio.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/raid/pq.h> #include <linux/hash.h> #include <linux/list_sort.h> #include <linux/raid/xor.h> #include <linux/mm.h> #include "messages.h" #include "ctree.h" #include "disk-io.h" #include "volumes.h" #include "raid56.h" #include "async-thread.h" #include "file-item.h" #include "btrfs_inode.h" /* set when additional merges to this rbio are not allowed */ #define RBIO_RMW_LOCKED_BIT 1 /* * set when this rbio is sitting in the hash, but it is just a cache * of past RMW */ #define RBIO_CACHE_BIT 2 /* * set when it is safe to trust the stripe_pages for caching */ #define RBIO_CACHE_READY_BIT 3 #define RBIO_CACHE_SIZE 1024 #define BTRFS_STRIPE_HASH_TABLE_BITS 11 static void dump_bioc(const struct btrfs_fs_info *fs_info, const struct btrfs_io_context *bioc) { if (unlikely(!bioc)) { btrfs_crit(fs_info, "bioc=NULL"); return; } btrfs_crit(fs_info, "bioc logical=%llu full_stripe=%llu size=%llu map_type=0x%llx mirror=%u replace_nr_stripes=%u replace_stripe_src=%d num_stripes=%u", bioc->logical, bioc->full_stripe_logical, bioc->size, bioc->map_type, bioc->mirror_num, bioc->replace_nr_stripes, bioc->replace_stripe_src, bioc->num_stripes); for (int i = 0; i < bioc->num_stripes; i++) { btrfs_crit(fs_info, " nr=%d devid=%llu physical=%llu", i, bioc->stripes[i].dev->devid, bioc->stripes[i].physical); } } static void btrfs_dump_rbio(const struct btrfs_fs_info *fs_info, const struct btrfs_raid_bio *rbio) { if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) return; dump_bioc(fs_info, rbio->bioc); btrfs_crit(fs_info, "rbio flags=0x%lx nr_sectors=%u nr_data=%u real_stripes=%u stripe_nsectors=%u scrubp=%u dbitmap=0x%lx", rbio->flags, rbio->nr_sectors, rbio->nr_data, rbio->real_stripes, rbio->stripe_nsectors, rbio->scrubp, rbio->dbitmap); } #define ASSERT_RBIO(expr, rbio) \ ({ \ if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \ const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \ (rbio)->bioc->fs_info : NULL; \ \ btrfs_dump_rbio(__fs_info, (rbio)); \ } \ ASSERT((expr)); \ }) #define ASSERT_RBIO_STRIPE(expr, rbio, stripe_nr) \ ({ \ if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \ const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \ (rbio)->bioc->fs_info : NULL; \ \ btrfs_dump_rbio(__fs_info, (rbio)); \ btrfs_crit(__fs_info, "stripe_nr=%d", (stripe_nr)); \ } \ ASSERT((expr)); \ }) #define ASSERT_RBIO_SECTOR(expr, rbio, sector_nr) \ ({ \ if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \ const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \ (rbio)->bioc->fs_info : NULL; \ \ btrfs_dump_rbio(__fs_info, (rbio)); \ btrfs_crit(__fs_info, "sector_nr=%d", (sector_nr)); \ } \ ASSERT((expr)); \ }) #define ASSERT_RBIO_LOGICAL(expr, rbio, logical) \ ({ \ if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \ const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \ (rbio)->bioc->fs_info : NULL; \ \ btrfs_dump_rbio(__fs_info, (rbio)); \ btrfs_crit(__fs_info, "logical=%llu", (logical)); \ } \ ASSERT((expr)); \ }) /* Used by the raid56 code to lock stripes for read/modify/write */ struct btrfs_stripe_hash { struct list_head hash_list; spinlock_t lock; }; /* Used by the raid56 code to lock stripes for read/modify/write */ struct btrfs_stripe_hash_table { struct list_head stripe_cache; spinlock_t cache_lock; int cache_size; struct btrfs_stripe_hash table[]; }; /* * A bvec like structure to present a sector inside a page. * * Unlike bvec we don't need bvlen, as it's fixed to sectorsize. */ struct sector_ptr { struct page *page; unsigned int pgoff:24; unsigned int uptodate:8; }; static void rmw_rbio_work(struct work_struct *work); static void rmw_rbio_work_locked(struct work_struct *work); static void index_rbio_pages(struct btrfs_raid_bio *rbio); static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); static int finish_parity_scrub(struct btrfs_raid_bio *rbio); static void scrub_rbio_work_locked(struct work_struct *work); static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio) { bitmap_free(rbio->error_bitmap); kfree(rbio->stripe_pages); kfree(rbio->bio_sectors); kfree(rbio->stripe_sectors); kfree(rbio->finish_pointers); } static void free_raid_bio(struct btrfs_raid_bio *rbio) { int i; if (!refcount_dec_and_test(&rbio->refs)) return; WARN_ON(!list_empty(&rbio->stripe_cache)); WARN_ON(!list_empty(&rbio->hash_list)); WARN_ON(!bio_list_empty(&rbio->bio_list)); for (i = 0; i < rbio->nr_pages; i++) { if (rbio->stripe_pages[i]) { __free_page(rbio->stripe_pages[i]); rbio->stripe_pages[i] = NULL; } } btrfs_put_bioc(rbio->bioc); free_raid_bio_pointers(rbio); kfree(rbio); } static void start_async_work(struct btrfs_raid_bio *rbio, work_func_t work_func) { INIT_WORK(&rbio->work, work_func); queue_work(rbio->bioc->fs_info->rmw_workers, &rbio->work); } /* * the stripe hash table is used for locking, and to collect * bios in hopes of making a full stripe */ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) { struct btrfs_stripe_hash_table *table; struct btrfs_stripe_hash_table *x; struct btrfs_stripe_hash *cur; struct btrfs_stripe_hash *h; int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS; int i; if (info->stripe_hash_table) return 0; /* * The table is large, starting with order 4 and can go as high as * order 7 in case lock debugging is turned on. * * Try harder to allocate and fallback to vmalloc to lower the chance * of a failing mount. */ table = kvzalloc(struct_size(table, table, num_entries), GFP_KERNEL); if (!table) return -ENOMEM; spin_lock_init(&table->cache_lock); INIT_LIST_HEAD(&table->stripe_cache); h = table->table; for (i = 0; i < num_entries; i++) { cur = h + i; INIT_LIST_HEAD(&cur->hash_list); spin_lock_init(&cur->lock); } x = cmpxchg(&info->stripe_hash_table, NULL, table); kvfree(x); return 0; } /* * caching an rbio means to copy anything from the * bio_sectors array into the stripe_pages array. We * use the page uptodate bit in the stripe cache array * to indicate if it has valid data * * once the caching is done, we set the cache ready * bit. */ static void cache_rbio_pages(struct btrfs_raid_bio *rbio) { int i; int ret; ret = alloc_rbio_pages(rbio); if (ret) return; for (i = 0; i < rbio->nr_sectors; i++) { /* Some range not covered by bio (partial write), skip it */ if (!rbio->bio_sectors[i].page) { /* * Even if the sector is not covered by bio, if it is * a data sector it should still be uptodate as it is * read from disk. */ if (i < rbio->nr_data * rbio->stripe_nsectors) ASSERT(rbio->stripe_sectors[i].uptodate); continue; } ASSERT(rbio->stripe_sectors[i].page); memcpy_page(rbio->stripe_sectors[i].page, rbio->stripe_sectors[i].pgoff, rbio->bio_sectors[i].page, rbio->bio_sectors[i].pgoff, rbio->bioc->fs_info->sectorsize); rbio->stripe_sectors[i].uptodate = 1; } set_bit(RBIO_CACHE_READY_BIT, &rbio->flags); } /* * we hash on the first logical address of the stripe */ static int rbio_bucket(struct btrfs_raid_bio *rbio) { u64 num = rbio->bioc->full_stripe_logical; /* * we shift down quite a bit. We're using byte * addressing, and most of the lower bits are zeros. * This tends to upset hash_64, and it consistently * returns just one or two different values. * * shifting off the lower bits fixes things. */ return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS); } static bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio, unsigned int page_nr) { const u32 sectorsize = rbio->bioc->fs_info->sectorsize; const u32 sectors_per_page = PAGE_SIZE / sectorsize; int i; ASSERT(page_nr < rbio->nr_pages); for (i = sectors_per_page * page_nr; i < sectors_per_page * page_nr + sectors_per_page; i++) { if (!rbio->stripe_sectors[i].uptodate) return false; } return true; } /* * Update the stripe_sectors[] array to use correct page and pgoff * * Should be called every time any page pointer in stripes_pages[] got modified. */ static void index_stripe_sectors(struct btrfs_raid_bio *rbio) { const u32 sectorsize = rbio->bioc->fs_info->sectorsize; u32 offset; int i; for (i = 0, offset = 0; i < rbio->nr_sectors; i++, offset += sectorsize) { int page_index = offset >> PAGE_SHIFT; ASSERT(page_index < rbio->nr_pages); rbio->stripe_sectors[i].page = rbio->stripe_pages[page_index]; rbio->stripe_sectors[i].pgoff = offset_in_page(offset); } } static void steal_rbio_page(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest, int page_nr) { const u32 sectorsize = src->bioc->fs_info->sectorsize; const u32 sectors_per_page = PAGE_SIZE / sectorsize; int i; if (dest->stripe_pages[page_nr]) __free_page(dest->stripe_pages[page_nr]); dest->stripe_pages[page_nr] = src->stripe_pages[page_nr]; src->stripe_pages[page_nr] = NULL; /* Also update the sector->uptodate bits. */ for (i = sectors_per_page * page_nr; i < sectors_per_page * page_nr + sectors_per_page; i++) dest->stripe_sectors[i].uptodate = true; } static bool is_data_stripe_page(struct btrfs_raid_bio *rbio, int page_nr) { const int sector_nr = (page_nr << PAGE_SHIFT) >> rbio->bioc->fs_info->sectorsize_bits; /* * We have ensured PAGE_SIZE is aligned with sectorsize, thus * we won't have a page which is half data half parity. * * Thus if the first sector of the page belongs to data stripes, then * the full page belongs to data stripes. */ return (sector_nr < rbio->nr_data * rbio->stripe_nsectors); } /* * Stealing an rbio means taking all the uptodate pages from the stripe array * in the source rbio and putting them into the destination rbio. * * This will also update the involved stripe_sectors[] which are referring to * the old pages. */ static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest) { int i; if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags)) return; for (i = 0; i < dest->nr_pages; i++) { struct page *p = src->stripe_pages[i]; /* * We don't need to steal P/Q pages as they will always be * regenerated for RMW or full write anyway. */ if (!is_data_stripe_page(src, i)) continue; /* * If @src already has RBIO_CACHE_READY_BIT, it should have * all data stripe pages present and uptodate. */ ASSERT(p); ASSERT(full_page_sectors_uptodate(src, i)); steal_rbio_page(src, dest, i); } index_stripe_sectors(dest); index_stripe_sectors(src); } /* * merging means we take the bio_list from the victim and * splice it into the destination. The victim should * be discarded afterwards. * * must be called with dest->rbio_list_lock held */ static void merge_rbio(struct btrfs_raid_bio *dest, struct btrfs_raid_bio *victim) { bio_list_merge_init(&dest->bio_list, &victim->bio_list); dest->bio_list_bytes += victim->bio_list_bytes; /* Also inherit the bitmaps from @victim. */ bitmap_or(&dest->dbitmap, &victim->dbitmap, &dest->dbitmap, dest->stripe_nsectors); } /* * used to prune items that are in the cache. The caller * must hold the hash table lock. */ static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio) { int bucket = rbio_bucket(rbio); struct btrfs_stripe_hash_table *table; struct btrfs_stripe_hash *h; int freeit = 0; /* * check the bit again under the hash table lock. */ if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) return; table = rbio->bioc->fs_info->stripe_hash_table; h = table->table + bucket; /* hold the lock for the bucket because we may be * removing it from the hash table */ spin_lock(&h->lock); /* * hold the lock for the bio list because we need * to make sure the bio list is empty */ spin_lock(&rbio->bio_list_lock); if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) { list_del_init(&rbio->stripe_cache); table->cache_size -= 1; freeit = 1; /* if the bio list isn't empty, this rbio is * still involved in an IO. We take it out * of the cache list, and drop the ref that * was held for the list. * * If the bio_list was empty, we also remove * the rbio from the hash_table, and drop * the corresponding ref */ if (bio_list_empty(&rbio->bio_list)) { if (!list_empty(&rbio->hash_list)) { list_del_init(&rbio->hash_list); refcount_dec(&rbio->refs); BUG_ON(!list_empty(&rbio->plug_list)); } } } spin_unlock(&rbio->bio_list_lock); spin_unlock(&h->lock); if (freeit) free_raid_bio(rbio); } /* * prune a given rbio from the cache */ static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio) { struct btrfs_stripe_hash_table *table; if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) return; table = rbio->bioc->fs_info->stripe_hash_table; spin_lock(&table->cache_lock); __remove_rbio_from_cache(rbio); spin_unlock(&table->cache_lock); } /* * remove everything in the cache */ static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info) { struct btrfs_stripe_hash_table *table; struct btrfs_raid_bio *rbio; table = info->stripe_hash_table; spin_lock(&table->cache_lock); while (!list_empty(&table->stripe_cache)) { rbio = list_entry(table->stripe_cache.next, struct btrfs_raid_bio, stripe_cache); __remove_rbio_from_cache(rbio); } spin_unlock(&table->cache_lock); } /* * remove all cached entries and free the hash table * used by unmount */ void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info) { if (!info->stripe_hash_table) return; btrfs_clear_rbio_cache(info); kvfree(info->stripe_hash_table); info->stripe_hash_table = NULL; } /* * insert an rbio into the stripe cache. It * must have already been prepared by calling * cache_rbio_pages * * If this rbio was already cached, it gets * moved to the front of the lru. * * If the size of the rbio cache is too big, we * prune an item. */ static void cache_rbio(struct btrfs_raid_bio *rbio) { struct btrfs_stripe_hash_table *table; if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags)) return; table = rbio->bioc->fs_info->stripe_hash_table; spin_lock(&table->cache_lock); spin_lock(&rbio->bio_list_lock); /* bump our ref if we were not in the list before */ if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags)) refcount_inc(&rbio->refs); if (!list_empty(&rbio->stripe_cache)){ list_move(&rbio->stripe_cache, &table->stripe_cache); } else { list_add(&rbio->stripe_cache, &table->stripe_cache); table->cache_size += 1; } spin_unlock(&rbio->bio_list_lock); if (table->cache_size > RBIO_CACHE_SIZE) { struct btrfs_raid_bio *found; found = list_entry(table->stripe_cache.prev, struct btrfs_raid_bio, stripe_cache); if (found != rbio) __remove_rbio_from_cache(found); } spin_unlock(&table->cache_lock); } /* * helper function to run the xor_blocks api. It is only * able to do MAX_XOR_BLOCKS at a time, so we need to * loop through. */ static void run_xor(void **pages, int src_cnt, ssize_t len) { int src_off = 0; int xor_src_cnt = 0; void *dest = pages[src_cnt]; while(src_cnt > 0) { xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); xor_blocks(xor_src_cnt, len, dest, pages + src_off); src_cnt -= xor_src_cnt; src_off += xor_src_cnt; } } /* * Returns true if the bio list inside this rbio covers an entire stripe (no * rmw required). */ static int rbio_is_full(struct btrfs_raid_bio *rbio) { unsigned long size = rbio->bio_list_bytes; int ret = 1; spin_lock(&rbio->bio_list_lock); if (size != rbio->nr_data * BTRFS_STRIPE_LEN) ret = 0; BUG_ON(size > rbio->nr_data * BTRFS_STRIPE_LEN); spin_unlock(&rbio->bio_list_lock); return ret; } /* * returns 1 if it is safe to merge two rbios together. * The merging is safe if the two rbios correspond to * the same stripe and if they are both going in the same * direction (read vs write), and if neither one is * locked for final IO * * The caller is responsible for locking such that * rmw_locked is safe to test */ static int rbio_can_merge(struct btrfs_raid_bio *last, struct btrfs_raid_bio *cur) { if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) || test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) return 0; /* * we can't merge with cached rbios, since the * idea is that when we merge the destination * rbio is going to run our IO for us. We can * steal from cached rbios though, other functions * handle that. */ if (test_bit(RBIO_CACHE_BIT, &last->flags) || test_bit(RBIO_CACHE_BIT, &cur->flags)) return 0; if (last->bioc->full_stripe_logical != cur->bioc->full_stripe_logical) return 0; /* we can't merge with different operations */ if (last->operation != cur->operation) return 0; /* * We've need read the full stripe from the drive. * check and repair the parity and write the new results. * * We're not allowed to add any new bios to the * bio list here, anyone else that wants to * change this stripe needs to do their own rmw. */ if (last->operation == BTRFS_RBIO_PARITY_SCRUB) return 0; if (last->operation == BTRFS_RBIO_READ_REBUILD) return 0; return 1; } static unsigned int rbio_stripe_sector_index(const struct btrfs_raid_bio *rbio, unsigned int stripe_nr, unsigned int sector_nr) { ASSERT_RBIO_STRIPE(stripe_nr < rbio->real_stripes, rbio, stripe_nr); ASSERT_RBIO_SECTOR(sector_nr < rbio->stripe_nsectors, rbio, sector_nr); return stripe_nr * rbio->stripe_nsectors + sector_nr; } /* Return a sector from rbio->stripe_sectors, not from the bio list */ static struct sector_ptr *rbio_stripe_sector(const struct btrfs_raid_bio *rbio, unsigned int stripe_nr, unsigned int sector_nr) { return &rbio->stripe_sectors[rbio_stripe_sector_index(rbio, stripe_nr, sector_nr)]; } /* Grab a sector inside P stripe */ static struct sector_ptr *rbio_pstripe_sector(const struct btrfs_raid_bio *rbio, unsigned int sector_nr) { return rbio_stripe_sector(rbio, rbio->nr_data, sector_nr); } /* Grab a sector inside Q stripe, return NULL if not RAID6 */ static struct sector_ptr *rbio_qstripe_sector(const struct btrfs_raid_bio *rbio, unsigned int sector_nr) { if (rbio->nr_data + 1 == rbio->real_stripes) return NULL; return rbio_stripe_sector(rbio, rbio->nr_data + 1, sector_nr); } /* * The first stripe in the table for a logical address * has the lock. rbios are added in one of three ways: * * 1) Nobody has the stripe locked yet. The rbio is given * the lock and 0 is returned. The caller must start the IO * themselves. * * 2) Someone has the stripe locked, but we're able to merge * with the lock owner. The rbio is freed and the IO will * start automatically along with the existing rbio. 1 is returned. * * 3) Someone has the stripe locked, but we're not able to merge. * The rbio is added to the lock owner's plug list, or merged into * an rbio already on the plug list. When the lock owner unlocks, * the next rbio on the list is run and the IO is started automatically. * 1 is returned * * If we return 0, the caller still owns the rbio and must continue with * IO submission. If we return 1, the caller must assume the rbio has * already been freed. */ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) { struct btrfs_stripe_hash *h; struct btrfs_raid_bio *cur; struct btrfs_raid_bio *pending; struct btrfs_raid_bio *freeit = NULL; struct btrfs_raid_bio *cache_drop = NULL; int ret = 0; h = rbio->bioc->fs_info->stripe_hash_table->table + rbio_bucket(rbio); spin_lock(&h->lock); list_for_each_entry(cur, &h->hash_list, hash_list) { if (cur->bioc->full_stripe_logical != rbio->bioc->full_stripe_logical) continue; spin_lock(&cur->bio_list_lock); /* Can we steal this cached rbio's pages? */ if (bio_list_empty(&cur->bio_list) && list_empty(&cur->plug_list) && test_bit(RBIO_CACHE_BIT, &cur->flags) && !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) { list_del_init(&cur->hash_list); refcount_dec(&cur->refs); steal_rbio(cur, rbio); cache_drop = cur; spin_unlock(&cur->bio_list_lock); goto lockit; } /* Can we merge into the lock owner? */ if (rbio_can_merge(cur, rbio)) { merge_rbio(cur, rbio); spin_unlock(&cur->bio_list_lock); freeit = rbio; ret = 1; goto out; } /* * We couldn't merge with the running rbio, see if we can merge * with the pending ones. We don't have to check for rmw_locked * because there is no way they are inside finish_rmw right now */ list_for_each_entry(pending, &cur->plug_list, plug_list) { if (rbio_can_merge(pending, rbio)) { merge_rbio(pending, rbio); spin_unlock(&cur->bio_list_lock); freeit = rbio; ret = 1; goto out; } } /* * No merging, put us on the tail of the plug list, our rbio * will be started with the currently running rbio unlocks */ list_add_tail(&rbio->plug_list, &cur->plug_list); spin_unlock(&cur->bio_list_lock); ret = 1; goto out; } lockit: refcount_inc(&rbio->refs); list_add(&rbio->hash_list, &h->hash_list); out: spin_unlock(&h->lock); if (cache_drop) remove_rbio_from_cache(cache_drop); if (freeit) free_raid_bio(freeit); return ret; } static void recover_rbio_work_locked(struct work_struct *work); /* * called as rmw or parity rebuild is completed. If the plug list has more * rbios waiting for this stripe, the next one on the list will be started */ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) { int bucket; struct btrfs_stripe_hash *h; int keep_cache = 0; bucket = rbio_bucket(rbio); h = rbio->bioc->fs_info->stripe_hash_table->table + bucket; if (list_empty(&rbio->plug_list)) cache_rbio(rbio); spin_lock(&h->lock); spin_lock(&rbio->bio_list_lock); if (!list_empty(&rbio->hash_list)) { /* * if we're still cached and there is no other IO * to perform, just leave this rbio here for others * to steal from later */ if (list_empty(&rbio->plug_list) && test_bit(RBIO_CACHE_BIT, &rbio->flags)) { keep_cache = 1; clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); BUG_ON(!bio_list_empty(&rbio->bio_list)); goto done; } list_del_init(&rbio->hash_list); refcount_dec(&rbio->refs); /* * we use the plug list to hold all the rbios * waiting for the chance to lock this stripe. * hand the lock over to one of them. */ if (!list_empty(&rbio->plug_list)) { struct btrfs_raid_bio *next; struct list_head *head = rbio->plug_list.next; next = list_entry(head, struct btrfs_raid_bio, plug_list); list_del_init(&rbio->plug_list); list_add(&next->hash_list, &h->hash_list); refcount_inc(&next->refs); spin_unlock(&rbio->bio_list_lock); spin_unlock(&h->lock); if (next->operation == BTRFS_RBIO_READ_REBUILD) { start_async_work(next, recover_rbio_work_locked); } else if (next->operation == BTRFS_RBIO_WRITE) { steal_rbio(rbio, next); start_async_work(next, rmw_rbio_work_locked); } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) { steal_rbio(rbio, next); start_async_work(next, scrub_rbio_work_locked); } goto done_nolock; } } done: spin_unlock(&rbio->bio_list_lock); spin_unlock(&h->lock); done_nolock: if (!keep_cache) remove_rbio_from_cache(rbio); } static void rbio_endio_bio_list(struct bio *cur, blk_status_t err) { struct bio *next; while (cur) { next = cur->bi_next; cur->bi_next = NULL; cur->bi_status = err; bio_endio(cur); cur = next; } } /* * this frees the rbio and runs through all the bios in the * bio_list and calls end_io on them */ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err) { struct bio *cur = bio_list_get(&rbio->bio_list); struct bio *extra; kfree(rbio->csum_buf); bitmap_free(rbio->csum_bitmap); rbio->csum_buf = NULL; rbio->csum_bitmap = NULL; /* * Clear the data bitmap, as the rbio may be cached for later usage. * do this before before unlock_stripe() so there will be no new bio * for this bio. */ bitmap_clear(&rbio->dbitmap, 0, rbio->stripe_nsectors); /* * At this moment, rbio->bio_list is empty, however since rbio does not * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the * hash list, rbio may be merged with others so that rbio->bio_list * becomes non-empty. * Once unlock_stripe() is done, rbio->bio_list will not be updated any * more and we can call bio_endio() on all queued bios. */ unlock_stripe(rbio); extra = bio_list_get(&rbio->bio_list); free_raid_bio(rbio); rbio_endio_bio_list(cur, err); if (extra) rbio_endio_bio_list(extra, err); } /* * Get a sector pointer specified by its @stripe_nr and @sector_nr. * * @rbio: The raid bio * @stripe_nr: Stripe number, valid range [0, real_stripe) * @sector_nr: Sector number inside the stripe, * valid range [0, stripe_nsectors) * @bio_list_only: Whether to use sectors inside the bio list only. * * The read/modify/write code wants to reuse the original bio page as much * as possible, and only use stripe_sectors as fallback. */ static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio, int stripe_nr, int sector_nr, bool bio_list_only) { struct sector_ptr *sector; int index; ASSERT_RBIO_STRIPE(stripe_nr >= 0 && stripe_nr < rbio->real_stripes, rbio, stripe_nr); ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors, rbio, sector_nr); index = stripe_nr * rbio->stripe_nsectors + sector_nr; ASSERT(index >= 0 && index < rbio->nr_sectors); spin_lock(&rbio->bio_list_lock); sector = &rbio->bio_sectors[index]; if (sector->page || bio_list_only) { /* Don't return sector without a valid page pointer */ if (!sector->page) sector = NULL; spin_unlock(&rbio->bio_list_lock); return sector; } spin_unlock(&rbio->bio_list_lock); return &rbio->stripe_sectors[index]; } /* * allocation and initial setup for the btrfs_raid_bio. Not * this does not allocate any pages for rbio->pages. */ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, struct btrfs_io_context *bioc) { const unsigned int real_stripes = bioc->num_stripes - bioc->replace_nr_stripes; const unsigned int stripe_npages = BTRFS_STRIPE_LEN >> PAGE_SHIFT; const unsigned int num_pages = stripe_npages * real_stripes; const unsigned int stripe_nsectors = BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits; const unsigned int num_sectors = stripe_nsectors * real_stripes; struct btrfs_raid_bio *rbio; /* PAGE_SIZE must also be aligned to sectorsize for subpage support */ ASSERT(IS_ALIGNED(PAGE_SIZE, fs_info->sectorsize)); /* * Our current stripe len should be fixed to 64k thus stripe_nsectors * (at most 16) should be no larger than BITS_PER_LONG. */ ASSERT(stripe_nsectors <= BITS_PER_LONG); /* * Real stripes must be between 2 (2 disks RAID5, aka RAID1) and 256 * (limited by u8). */ ASSERT(real_stripes >= 2); ASSERT(real_stripes <= U8_MAX); rbio = kzalloc(sizeof(*rbio), GFP_NOFS); if (!rbio) return ERR_PTR(-ENOMEM); rbio->stripe_pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); rbio->bio_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr), GFP_NOFS); rbio->stripe_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr), GFP_NOFS); rbio->finish_pointers = kcalloc(real_stripes, sizeof(void *), GFP_NOFS); rbio->error_bitmap = bitmap_zalloc(num_sectors, GFP_NOFS); if (!rbio->stripe_pages || !rbio->bio_sectors || !rbio->stripe_sectors || !rbio->finish_pointers || !rbio->error_bitmap) { free_raid_bio_pointers(rbio); kfree(rbio); return ERR_PTR(-ENOMEM); } bio_list_init(&rbio->bio_list); init_waitqueue_head(&rbio->io_wait); INIT_LIST_HEAD(&rbio->plug_list); spin_lock_init(&rbio->bio_list_lock); INIT_LIST_HEAD(&rbio->stripe_cache); INIT_LIST_HEAD(&rbio->hash_list); btrfs_get_bioc(bioc); rbio->bioc = bioc; rbio->nr_pages = num_pages; rbio->nr_sectors = num_sectors; rbio->real_stripes = real_stripes; rbio->stripe_npages = stripe_npages; rbio->stripe_nsectors = stripe_nsectors; refcount_set(&rbio->refs, 1); atomic_set(&rbio->stripes_pending, 0); ASSERT(btrfs_nr_parity_stripes(bioc->map_type)); rbio->nr_data = real_stripes - btrfs_nr_parity_stripes(bioc->map_type); ASSERT(rbio->nr_data > 0); return rbio; } /* allocate pages for all the stripes in the bio, including parity */ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio) { int ret; ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages, false); if (ret < 0) return ret; /* Mapping all sectors */ index_stripe_sectors(rbio); return 0; } /* only allocate pages for p/q stripes */ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio) { const int data_pages = rbio->nr_data * rbio->stripe_npages; int ret; ret = btrfs_alloc_page_array(rbio->nr_pages - data_pages, rbio->stripe_pages + data_pages, false); if (ret < 0) return ret; index_stripe_sectors(rbio); return 0; } /* * Return the total number of errors found in the vertical stripe of @sector_nr. * * @faila and @failb will also be updated to the first and second stripe * number of the errors. */ static int get_rbio_veritical_errors(struct btrfs_raid_bio *rbio, int sector_nr, int *faila, int *failb) { int stripe_nr; int found_errors = 0; if (faila || failb) { /* * Both @faila and @failb should be valid pointers if any of * them is specified. */ ASSERT(faila && failb); *faila = -1; *failb = -1; } for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) { int total_sector_nr = stripe_nr * rbio->stripe_nsectors + sector_nr; if (test_bit(total_sector_nr, rbio->error_bitmap)) { found_errors++; if (faila) { /* Update faila and failb. */ if (*faila < 0) *faila = stripe_nr; else if (*failb < 0) *failb = stripe_nr; } } } return found_errors; } /* * Add a single sector @sector into our list of bios for IO. * * Return 0 if everything went well. * Return <0 for error. */ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio, struct bio_list *bio_list, struct sector_ptr *sector, unsigned int stripe_nr, unsigned int sector_nr, enum req_op op) { const u32 sectorsize = rbio->bioc->fs_info->sectorsize; struct bio *last = bio_list->tail; int ret; struct bio *bio; struct btrfs_io_stripe *stripe; u64 disk_start; /* * Note: here stripe_nr has taken device replace into consideration, * thus it can be larger than rbio->real_stripe. * So here we check against bioc->num_stripes, not rbio->real_stripes. */ ASSERT_RBIO_STRIPE(stripe_nr >= 0 && stripe_nr < rbio->bioc->num_stripes, rbio, stripe_nr); ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors, rbio, sector_nr); ASSERT(sector->page); stripe = &rbio->bioc->stripes[stripe_nr]; disk_start = stripe->physical + sector_nr * sectorsize; /* if the device is missing, just fail this stripe */ if (!stripe->dev->bdev) { int found_errors; set_bit(stripe_nr * rbio->stripe_nsectors + sector_nr, rbio->error_bitmap); /* Check if we have reached tolerance early. */ found_errors = get_rbio_veritical_errors(rbio, sector_nr, NULL, NULL); if (found_errors > rbio->bioc->max_errors) return -EIO; return 0; } /* see if we can add this page onto our existing bio */ if (last) { u64 last_end = last->bi_iter.bi_sector << SECTOR_SHIFT; last_end += last->bi_iter.bi_size; /* * we can't merge these if they are from different * devices or if they are not contiguous */ if (last_end == disk_start && !last->bi_status && last->bi_bdev == stripe->dev->bdev) { ret = bio_add_page(last, sector->page, sectorsize, sector->pgoff); if (ret == sectorsize) return 0; } } /* put a new bio on the list */ bio = bio_alloc(stripe->dev->bdev, max(BTRFS_STRIPE_LEN >> PAGE_SHIFT, 1), op, GFP_NOFS); bio->bi_iter.bi_sector = disk_start >> SECTOR_SHIFT; bio->bi_private = rbio; __bio_add_page(bio, sector->page, sectorsize, sector->pgoff); bio_list_add(bio_list, bio); return 0; } static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio) { const u32 sectorsize = rbio->bioc->fs_info->sectorsize; struct bio_vec bvec; struct bvec_iter iter; u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - rbio->bioc->full_stripe_logical; bio_for_each_segment(bvec, bio, iter) { u32 bvec_offset; for (bvec_offset = 0; bvec_offset < bvec.bv_len; bvec_offset += sectorsize, offset += sectorsize) { int index = offset / sectorsize; struct sector_ptr *sector = &rbio->bio_sectors[index]; sector->page = bvec.bv_page; sector->pgoff = bvec.bv_offset + bvec_offset; ASSERT(sector->pgoff < PAGE_SIZE); } } } /* * helper function to walk our bio list and populate the bio_pages array with * the result. This seems expensive, but it is faster than constantly * searching through the bio list as we setup the IO in finish_rmw or stripe * reconstruction. * * This must be called before you trust the answers from page_in_rbio */ static void index_rbio_pages(struct btrfs_raid_bio *rbio) { struct bio *bio; spin_lock(&rbio->bio_list_lock); bio_list_for_each(bio, &rbio->bio_list) index_one_bio(rbio, bio); spin_unlock(&rbio->bio_list_lock); } static void bio_get_trace_info(struct btrfs_raid_bio *rbio, struct bio *bio, struct raid56_bio_trace_info *trace_info) { const struct btrfs_io_context *bioc = rbio->bioc; int i; ASSERT(bioc); /* We rely on bio->bi_bdev to find the stripe number. */ if (!bio->bi_bdev) goto not_found; for (i = 0; i < bioc->num_stripes; i++) { if (bio->bi_bdev != bioc->stripes[i].dev->bdev) continue; trace_info->stripe_nr = i; trace_info->devid = bioc->stripes[i].dev->devid; trace_info->offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - bioc->stripes[i].physical; return; } not_found: trace_info->devid = -1; trace_info->offset = -1; trace_info->stripe_nr = -1; } static inline void bio_list_put(struct bio_list *bio_list) { struct bio *bio; while ((bio = bio_list_pop(bio_list))) bio_put(bio); } static void assert_rbio(struct btrfs_raid_bio *rbio) { if (!IS_ENABLED(CONFIG_BTRFS_DEBUG) || !IS_ENABLED(CONFIG_BTRFS_ASSERT)) return; /* * At least two stripes (2 disks RAID5), and since real_stripes is U8, * we won't go beyond 256 disks anyway. */ ASSERT_RBIO(rbio->real_stripes >= 2, rbio); ASSERT_RBIO(rbio->nr_data > 0, rbio); /* * This is another check to make sure nr data stripes is smaller * than total stripes. */ ASSERT_RBIO(rbio->nr_data < rbio->real_stripes, rbio); } /* Generate PQ for one vertical stripe. */ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr) { void **pointers = rbio->finish_pointers; const u32 sectorsize = rbio->bioc->fs_info->sectorsize; struct sector_ptr *sector; int stripe; const bool has_qstripe = rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6; /* First collect one sector from each data stripe */ for (stripe = 0; stripe < rbio->nr_data; stripe++) { sector = sector_in_rbio(rbio, stripe, sectornr, 0); pointers[stripe] = kmap_local_page(sector->page) + sector->pgoff; } /* Then add the parity stripe */ sector = rbio_pstripe_sector(rbio, sectornr); sector->uptodate = 1; pointers[stripe++] = kmap_local_page(sector->page) + sector->pgoff; if (has_qstripe) { /* * RAID6, add the qstripe and call the library function * to fill in our p/q */ sector = rbio_qstripe_sector(rbio, sectornr); sector->uptodate = 1; pointers[stripe++] = kmap_local_page(sector->page) + sector->pgoff; assert_rbio(rbio); raid6_call.gen_syndrome(rbio->real_stripes, sectorsize, pointers); } else { /* raid5 */ memcpy(pointers[rbio->nr_data], pointers[0], sectorsize); run_xor(pointers + 1, rbio->nr_data - 1, sectorsize); } for (stripe = stripe - 1; stripe >= 0; stripe--) kunmap_local(pointers[stripe]); } static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio, struct bio_list *bio_list) { /* The total sector number inside the full stripe. */ int total_sector_nr; int sectornr; int stripe; int ret; ASSERT(bio_list_size(bio_list) == 0); /* We should have at least one data sector. */ ASSERT(bitmap_weight(&rbio->dbitmap, rbio->stripe_nsectors)); /* * Reset errors, as we may have errors inherited from from degraded * write. */ bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors); /* * Start assembly. Make bios for everything from the higher layers (the * bio_list in our rbio) and our P/Q. Ignore everything else. */ for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; total_sector_nr++) { struct sector_ptr *sector; stripe = total_sector_nr / rbio->stripe_nsectors; sectornr = total_sector_nr % rbio->stripe_nsectors; /* This vertical stripe has no data, skip it. */ if (!test_bit(sectornr, &rbio->dbitmap)) continue; if (stripe < rbio->nr_data) { sector = sector_in_rbio(rbio, stripe, sectornr, 1); if (!sector) continue; } else { sector = rbio_stripe_sector(rbio, stripe, sectornr); } ret = rbio_add_io_sector(rbio, bio_list, sector, stripe, sectornr, REQ_OP_WRITE); if (ret) goto error; } if (likely(!rbio->bioc->replace_nr_stripes)) return 0; /* * Make a copy for the replace target device. * * Thus the source stripe number (in replace_stripe_src) should be valid. */ ASSERT(rbio->bioc->replace_stripe_src >= 0); for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; total_sector_nr++) { struct sector_ptr *sector; stripe = total_sector_nr / rbio->stripe_nsectors; sectornr = total_sector_nr % rbio->stripe_nsectors; /* * For RAID56, there is only one device that can be replaced, * and replace_stripe_src[0] indicates the stripe number we * need to copy from. */ if (stripe != rbio->bioc->replace_stripe_src) { /* * We can skip the whole stripe completely, note * total_sector_nr will be increased by one anyway. */ ASSERT(sectornr == 0); total_sector_nr += rbio->stripe_nsectors - 1; continue; } /* This vertical stripe has no data, skip it. */ if (!test_bit(sectornr, &rbio->dbitmap)) continue; if (stripe < rbio->nr_data) { sector = sector_in_rbio(rbio, stripe, sectornr, 1); if (!sector) continue; } else { sector = rbio_stripe_sector(rbio, stripe, sectornr); } ret = rbio_add_io_sector(rbio, bio_list, sector, rbio->real_stripes, sectornr, REQ_OP_WRITE); if (ret) goto error; } return 0; error: bio_list_put(bio_list); return -EIO; } static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - rbio->bioc->full_stripe_logical; int total_nr_sector = offset >> fs_info->sectorsize_bits; ASSERT(total_nr_sector < rbio->nr_data * rbio->stripe_nsectors); bitmap_set(rbio->error_bitmap, total_nr_sector, bio->bi_iter.bi_size >> fs_info->sectorsize_bits); /* * Special handling for raid56_alloc_missing_rbio() used by * scrub/replace. Unlike call path in raid56_parity_recover(), they * pass an empty bio here. Thus we have to find out the missing device * and mark the stripe error instead. */ if (bio->bi_iter.bi_size == 0) { bool found_missing = false; int stripe_nr; for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) { if (!rbio->bioc->stripes[stripe_nr].dev->bdev) { found_missing = true; bitmap_set(rbio->error_bitmap, stripe_nr * rbio->stripe_nsectors, rbio->stripe_nsectors); } } ASSERT(found_missing); } } /* * For subpage case, we can no longer set page Up-to-date directly for * stripe_pages[], thus we need to locate the sector. */ static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio, struct page *page, unsigned int pgoff) { int i; for (i = 0; i < rbio->nr_sectors; i++) { struct sector_ptr *sector = &rbio->stripe_sectors[i]; if (sector->page == page && sector->pgoff == pgoff) return sector; } return NULL; } /* * this sets each page in the bio uptodate. It should only be used on private * rbio pages, nothing that comes in from the higher layers */ static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio) { const u32 sectorsize = rbio->bioc->fs_info->sectorsize; struct bio_vec *bvec; struct bvec_iter_all iter_all; ASSERT(!bio_flagged(bio, BIO_CLONED)); bio_for_each_segment_all(bvec, bio, iter_all) { struct sector_ptr *sector; int pgoff; for (pgoff = bvec->bv_offset; pgoff - bvec->bv_offset < bvec->bv_len; pgoff += sectorsize) { sector = find_stripe_sector(rbio, bvec->bv_page, pgoff); ASSERT(sector); if (sector) sector->uptodate = 1; } } } static int get_bio_sector_nr(struct btrfs_raid_bio *rbio, struct bio *bio) { struct bio_vec *bv = bio_first_bvec_all(bio); int i; for (i = 0; i < rbio->nr_sectors; i++) { struct sector_ptr *sector; sector = &rbio->stripe_sectors[i]; if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset) break; sector = &rbio->bio_sectors[i]; if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset) break; } ASSERT(i < rbio->nr_sectors); return i; } static void rbio_update_error_bitmap(struct btrfs_raid_bio *rbio, struct bio *bio) { int total_sector_nr = get_bio_sector_nr(rbio, bio); u32 bio_size = 0; struct bio_vec *bvec; int i; bio_for_each_bvec_all(bvec, bio, i) bio_size += bvec->bv_len; /* * Since we can have multiple bios touching the error_bitmap, we cannot * call bitmap_set() without protection. * * Instead use set_bit() for each bit, as set_bit() itself is atomic. */ for (i = total_sector_nr; i < total_sector_nr + (bio_size >> rbio->bioc->fs_info->sectorsize_bits); i++) set_bit(i, rbio->error_bitmap); } /* Verify the data sectors at read time. */ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio, struct bio *bio) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; int total_sector_nr = get_bio_sector_nr(rbio, bio); struct bio_vec *bvec; struct bvec_iter_all iter_all; /* No data csum for the whole stripe, no need to verify. */ if (!rbio->csum_bitmap || !rbio->csum_buf) return; /* P/Q stripes, they have no data csum to verify against. */ if (total_sector_nr >= rbio->nr_data * rbio->stripe_nsectors) return; bio_for_each_segment_all(bvec, bio, iter_all) { int bv_offset; for (bv_offset = bvec->bv_offset; bv_offset < bvec->bv_offset + bvec->bv_len; bv_offset += fs_info->sectorsize, total_sector_nr++) { u8 csum_buf[BTRFS_CSUM_SIZE]; u8 *expected_csum = rbio->csum_buf + total_sector_nr * fs_info->csum_size; int ret; /* No csum for this sector, skip to the next sector. */ if (!test_bit(total_sector_nr, rbio->csum_bitmap)) continue; ret = btrfs_check_sector_csum(fs_info, bvec->bv_page, bv_offset, csum_buf, expected_csum); if (ret < 0) set_bit(total_sector_nr, rbio->error_bitmap); } } } static void raid_wait_read_end_io(struct bio *bio) { struct btrfs_raid_bio *rbio = bio->bi_private; if (bio->bi_status) { rbio_update_error_bitmap(rbio, bio); } else { set_bio_pages_uptodate(rbio, bio); verify_bio_data_sectors(rbio, bio); } bio_put(bio); if (atomic_dec_and_test(&rbio->stripes_pending)) wake_up(&rbio->io_wait); } static void submit_read_wait_bio_list(struct btrfs_raid_bio *rbio, struct bio_list *bio_list) { struct bio *bio; atomic_set(&rbio->stripes_pending, bio_list_size(bio_list)); while ((bio = bio_list_pop(bio_list))) { bio->bi_end_io = raid_wait_read_end_io; if (trace_raid56_read_enabled()) { struct raid56_bio_trace_info trace_info = { 0 }; bio_get_trace_info(rbio, bio, &trace_info); trace_raid56_read(rbio, bio, &trace_info); } submit_bio(bio); } wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); } static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio) { const int data_pages = rbio->nr_data * rbio->stripe_npages; int ret; ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages, false); if (ret < 0) return ret; index_stripe_sectors(rbio); return 0; } /* * We use plugging call backs to collect full stripes. * Any time we get a partial stripe write while plugged * we collect it into a list. When the unplug comes down, * we sort the list by logical block number and merge * everything we can into the same rbios */ struct btrfs_plug_cb { struct blk_plug_cb cb; struct btrfs_fs_info *info; struct list_head rbio_list; }; /* * rbios on the plug list are sorted for easier merging. */ static int plug_cmp(void *priv, const struct list_head *a, const struct list_head *b) { const struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio, plug_list); const struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio, plug_list); u64 a_sector = ra->bio_list.head->bi_iter.bi_sector; u64 b_sector = rb->bio_list.head->bi_iter.bi_sector; if (a_sector < b_sector) return -1; if (a_sector > b_sector) return 1; return 0; } static void raid_unplug(struct blk_plug_cb *cb, bool from_schedule) { struct btrfs_plug_cb *plug = container_of(cb, struct btrfs_plug_cb, cb); struct btrfs_raid_bio *cur; struct btrfs_raid_bio *last = NULL; list_sort(NULL, &plug->rbio_list, plug_cmp); while (!list_empty(&plug->rbio_list)) { cur = list_entry(plug->rbio_list.next, struct btrfs_raid_bio, plug_list); list_del_init(&cur->plug_list); if (rbio_is_full(cur)) { /* We have a full stripe, queue it down. */ start_async_work(cur, rmw_rbio_work); continue; } if (last) { if (rbio_can_merge(last, cur)) { merge_rbio(last, cur); free_raid_bio(cur); continue; } start_async_work(last, rmw_rbio_work); } last = cur; } if (last) start_async_work(last, rmw_rbio_work); kfree(plug); } /* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */ static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio) { const struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT; const u64 full_stripe_start = rbio->bioc->full_stripe_logical; const u32 orig_len = orig_bio->bi_iter.bi_size; const u32 sectorsize = fs_info->sectorsize; u64 cur_logical; ASSERT_RBIO_LOGICAL(orig_logical >= full_stripe_start && orig_logical + orig_len <= full_stripe_start + rbio->nr_data * BTRFS_STRIPE_LEN, rbio, orig_logical); bio_list_add(&rbio->bio_list, orig_bio); rbio->bio_list_bytes += orig_bio->bi_iter.bi_size; /* Update the dbitmap. */ for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len; cur_logical += sectorsize) { int bit = ((u32)(cur_logical - full_stripe_start) >> fs_info->sectorsize_bits) % rbio->stripe_nsectors; set_bit(bit, &rbio->dbitmap); } } /* * our main entry point for writes from the rest of the FS. */ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc) { struct btrfs_fs_info *fs_info = bioc->fs_info; struct btrfs_raid_bio *rbio; struct btrfs_plug_cb *plug = NULL; struct blk_plug_cb *cb; rbio = alloc_rbio(fs_info, bioc); if (IS_ERR(rbio)) { bio->bi_status = errno_to_blk_status(PTR_ERR(rbio)); bio_endio(bio); return; } rbio->operation = BTRFS_RBIO_WRITE; rbio_add_bio(rbio, bio); /* * Don't plug on full rbios, just get them out the door * as quickly as we can */ if (!rbio_is_full(rbio)) { cb = blk_check_plugged(raid_unplug, fs_info, sizeof(*plug)); if (cb) { plug = container_of(cb, struct btrfs_plug_cb, cb); if (!plug->info) { plug->info = fs_info; INIT_LIST_HEAD(&plug->rbio_list); } list_add_tail(&rbio->plug_list, &plug->rbio_list); return; } } /* * Either we don't have any existing plug, or we're doing a full stripe, * queue the rmw work now. */ start_async_work(rbio, rmw_rbio_work); } static int verify_one_sector(struct btrfs_raid_bio *rbio, int stripe_nr, int sector_nr) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; struct sector_ptr *sector; u8 csum_buf[BTRFS_CSUM_SIZE]; u8 *csum_expected; int ret; if (!rbio->csum_bitmap || !rbio->csum_buf) return 0; /* No way to verify P/Q as they are not covered by data csum. */ if (stripe_nr >= rbio->nr_data) return 0; /* * If we're rebuilding a read, we have to use pages from the * bio list if possible. */ if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0); } else { sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr); } ASSERT(sector->page); csum_expected = rbio->csum_buf + (stripe_nr * rbio->stripe_nsectors + sector_nr) * fs_info->csum_size; ret = btrfs_check_sector_csum(fs_info, sector->page, sector->pgoff, csum_buf, csum_expected); return ret; } /* * Recover a vertical stripe specified by @sector_nr. * @*pointers are the pre-allocated pointers by the caller, so we don't * need to allocate/free the pointers again and again. */ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr, void **pointers, void **unmap_array) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; struct sector_ptr *sector; const u32 sectorsize = fs_info->sectorsize; int found_errors; int faila; int failb; int stripe_nr; int ret = 0; /* * Now we just use bitmap to mark the horizontal stripes in * which we have data when doing parity scrub. */ if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB && !test_bit(sector_nr, &rbio->dbitmap)) return 0; found_errors = get_rbio_veritical_errors(rbio, sector_nr, &faila, &failb); /* * No errors in the vertical stripe, skip it. Can happen for recovery * which only part of a stripe failed csum check. */ if (!found_errors) return 0; if (found_errors > rbio->bioc->max_errors) return -EIO; /* * Setup our array of pointers with sectors from each stripe * * NOTE: store a duplicate array of pointers to preserve the * pointer order. */ for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) { /* * If we're rebuilding a read, we have to use pages from the * bio list if possible. */ if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0); } else { sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr); } ASSERT(sector->page); pointers[stripe_nr] = kmap_local_page(sector->page) + sector->pgoff; unmap_array[stripe_nr] = pointers[stripe_nr]; } /* All raid6 handling here */ if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) { /* Single failure, rebuild from parity raid5 style */ if (failb < 0) { if (faila == rbio->nr_data) /* * Just the P stripe has failed, without * a bad data or Q stripe. * We have nothing to do, just skip the * recovery for this stripe. */ goto cleanup; /* * a single failure in raid6 is rebuilt * in the pstripe code below */ goto pstripe; } /* * If the q stripe is failed, do a pstripe reconstruction from * the xors. * If both the q stripe and the P stripe are failed, we're * here due to a crc mismatch and we can't give them the * data they want. */ if (failb == rbio->real_stripes - 1) { if (faila == rbio->real_stripes - 2) /* * Only P and Q are corrupted. * We only care about data stripes recovery, * can skip this vertical stripe. */ goto cleanup; /* * Otherwise we have one bad data stripe and * a good P stripe. raid5! */ goto pstripe; } if (failb == rbio->real_stripes - 2) { raid6_datap_recov(rbio->real_stripes, sectorsize, faila, pointers); } else { raid6_2data_recov(rbio->real_stripes, sectorsize, faila, failb, pointers); } } else { void *p; /* Rebuild from P stripe here (raid5 or raid6). */ ASSERT(failb == -1); pstripe: /* Copy parity block into failed block to start with */ memcpy(pointers[faila], pointers[rbio->nr_data], sectorsize); /* Rearrange the pointer array */ p = pointers[faila]; for (stripe_nr = faila; stripe_nr < rbio->nr_data - 1; stripe_nr++) pointers[stripe_nr] = pointers[stripe_nr + 1]; pointers[rbio->nr_data - 1] = p; /* Xor in the rest */ run_xor(pointers, rbio->nr_data - 1, sectorsize); } /* * No matter if this is a RMW or recovery, we should have all * failed sectors repaired in the vertical stripe, thus they are now * uptodate. * Especially if we determine to cache the rbio, we need to * have at least all data sectors uptodate. * * If possible, also check if the repaired sector matches its data * checksum. */ if (faila >= 0) { ret = verify_one_sector(rbio, faila, sector_nr); if (ret < 0) goto cleanup; sector = rbio_stripe_sector(rbio, faila, sector_nr); sector->uptodate = 1; } if (failb >= 0) { ret = verify_one_sector(rbio, failb, sector_nr); if (ret < 0) goto cleanup; sector = rbio_stripe_sector(rbio, failb, sector_nr); sector->uptodate = 1; } cleanup: for (stripe_nr = rbio->real_stripes - 1; stripe_nr >= 0; stripe_nr--) kunmap_local(unmap_array[stripe_nr]); return ret; } static int recover_sectors(struct btrfs_raid_bio *rbio) { void **pointers = NULL; void **unmap_array = NULL; int sectornr; int ret = 0; /* * @pointers array stores the pointer for each sector. * * @unmap_array stores copy of pointers that does not get reordered * during reconstruction so that kunmap_local works. */ pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); if (!pointers || !unmap_array) { ret = -ENOMEM; goto out; } if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { spin_lock(&rbio->bio_list_lock); set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); spin_unlock(&rbio->bio_list_lock); } index_rbio_pages(rbio); for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) { ret = recover_vertical(rbio, sectornr, pointers, unmap_array); if (ret < 0) break; } out: kfree(pointers); kfree(unmap_array); return ret; } static void recover_rbio(struct btrfs_raid_bio *rbio) { struct bio_list bio_list = BIO_EMPTY_LIST; int total_sector_nr; int ret = 0; /* * Either we're doing recover for a read failure or degraded write, * caller should have set error bitmap correctly. */ ASSERT(bitmap_weight(rbio->error_bitmap, rbio->nr_sectors)); /* For recovery, we need to read all sectors including P/Q. */ ret = alloc_rbio_pages(rbio); if (ret < 0) goto out; index_rbio_pages(rbio); /* * Read everything that hasn't failed. However this time we will * not trust any cached sector. * As we may read out some stale data but higher layer is not reading * that stale part. * * So here we always re-read everything in recovery path. */ for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; total_sector_nr++) { int stripe = total_sector_nr / rbio->stripe_nsectors; int sectornr = total_sector_nr % rbio->stripe_nsectors; struct sector_ptr *sector; /* * Skip the range which has error. It can be a range which is * marked error (for csum mismatch), or it can be a missing * device. */ if (!rbio->bioc->stripes[stripe].dev->bdev || test_bit(total_sector_nr, rbio->error_bitmap)) { /* * Also set the error bit for missing device, which * may not yet have its error bit set. */ set_bit(total_sector_nr, rbio->error_bitmap); continue; } sector = rbio_stripe_sector(rbio, stripe, sectornr); ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe, sectornr, REQ_OP_READ); if (ret < 0) { bio_list_put(&bio_list); goto out; } } submit_read_wait_bio_list(rbio, &bio_list); ret = recover_sectors(rbio); out: rbio_orig_end_io(rbio, errno_to_blk_status(ret)); } static void recover_rbio_work(struct work_struct *work) { struct btrfs_raid_bio *rbio; rbio = container_of(work, struct btrfs_raid_bio, work); if (!lock_stripe_add(rbio)) recover_rbio(rbio); } static void recover_rbio_work_locked(struct work_struct *work) { recover_rbio(container_of(work, struct btrfs_raid_bio, work)); } static void set_rbio_raid6_extra_error(struct btrfs_raid_bio *rbio, int mirror_num) { bool found = false; int sector_nr; /* * This is for RAID6 extra recovery tries, thus mirror number should * be large than 2. * Mirror 1 means read from data stripes. Mirror 2 means rebuild using * RAID5 methods. */ ASSERT(mirror_num > 2); for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { int found_errors; int faila; int failb; found_errors = get_rbio_veritical_errors(rbio, sector_nr, &faila, &failb); /* This vertical stripe doesn't have errors. */ if (!found_errors) continue; /* * If we found errors, there should be only one error marked * by previous set_rbio_range_error(). */ ASSERT(found_errors == 1); found = true; /* Now select another stripe to mark as error. */ failb = rbio->real_stripes - (mirror_num - 1); if (failb <= faila) failb--; /* Set the extra bit in error bitmap. */ if (failb >= 0) set_bit(failb * rbio->stripe_nsectors + sector_nr, rbio->error_bitmap); } /* We should found at least one vertical stripe with error.*/ ASSERT(found); } /* * the main entry point for reads from the higher layers. This * is really only called when the normal read path had a failure, * so we assume the bio they send down corresponds to a failed part * of the drive. */ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc, int mirror_num) { struct btrfs_fs_info *fs_info = bioc->fs_info; struct btrfs_raid_bio *rbio; rbio = alloc_rbio(fs_info, bioc); if (IS_ERR(rbio)) { bio->bi_status = errno_to_blk_status(PTR_ERR(rbio)); bio_endio(bio); return; } rbio->operation = BTRFS_RBIO_READ_REBUILD; rbio_add_bio(rbio, bio); set_rbio_range_error(rbio, bio); /* * Loop retry: * for 'mirror == 2', reconstruct from all other stripes. * for 'mirror_num > 2', select a stripe to fail on every retry. */ if (mirror_num > 2) set_rbio_raid6_extra_error(rbio, mirror_num); start_async_work(rbio, recover_rbio_work); } static void fill_data_csums(struct btrfs_raid_bio *rbio) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; struct btrfs_root *csum_root = btrfs_csum_root(fs_info, rbio->bioc->full_stripe_logical); const u64 start = rbio->bioc->full_stripe_logical; const u32 len = (rbio->nr_data * rbio->stripe_nsectors) << fs_info->sectorsize_bits; int ret; /* The rbio should not have its csum buffer initialized. */ ASSERT(!rbio->csum_buf && !rbio->csum_bitmap); /* * Skip the csum search if: * * - The rbio doesn't belong to data block groups * Then we are doing IO for tree blocks, no need to search csums. * * - The rbio belongs to mixed block groups * This is to avoid deadlock, as we're already holding the full * stripe lock, if we trigger a metadata read, and it needs to do * raid56 recovery, we will deadlock. */ if (!(rbio->bioc->map_type & BTRFS_BLOCK_GROUP_DATA) || rbio->bioc->map_type & BTRFS_BLOCK_GROUP_METADATA) return; rbio->csum_buf = kzalloc(rbio->nr_data * rbio->stripe_nsectors * fs_info->csum_size, GFP_NOFS); rbio->csum_bitmap = bitmap_zalloc(rbio->nr_data * rbio->stripe_nsectors, GFP_NOFS); if (!rbio->csum_buf || !rbio->csum_bitmap) { ret = -ENOMEM; goto error; } ret = btrfs_lookup_csums_bitmap(csum_root, NULL, start, start + len - 1, rbio->csum_buf, rbio->csum_bitmap); if (ret < 0) goto error; if (bitmap_empty(rbio->csum_bitmap, len >> fs_info->sectorsize_bits)) goto no_csum; return; error: /* * We failed to allocate memory or grab the csum, but it's not fatal, * we can still continue. But better to warn users that RMW is no * longer safe for this particular sub-stripe write. */ btrfs_warn_rl(fs_info, "sub-stripe write for full stripe %llu is not safe, failed to get csum: %d", rbio->bioc->full_stripe_logical, ret); no_csum: kfree(rbio->csum_buf); bitmap_free(rbio->csum_bitmap); rbio->csum_buf = NULL; rbio->csum_bitmap = NULL; } static int rmw_read_wait_recover(struct btrfs_raid_bio *rbio) { struct bio_list bio_list = BIO_EMPTY_LIST; int total_sector_nr; int ret = 0; /* * Fill the data csums we need for data verification. We need to fill * the csum_bitmap/csum_buf first, as our endio function will try to * verify the data sectors. */ fill_data_csums(rbio); /* * Build a list of bios to read all sectors (including data and P/Q). * * This behavior is to compensate the later csum verification and recovery. */ for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; total_sector_nr++) { struct sector_ptr *sector; int stripe = total_sector_nr / rbio->stripe_nsectors; int sectornr = total_sector_nr % rbio->stripe_nsectors; sector = rbio_stripe_sector(rbio, stripe, sectornr); ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe, sectornr, REQ_OP_READ); if (ret) { bio_list_put(&bio_list); return ret; } } /* * We may or may not have any corrupted sectors (including missing dev * and csum mismatch), just let recover_sectors() to handle them all. */ submit_read_wait_bio_list(rbio, &bio_list); return recover_sectors(rbio); } static void raid_wait_write_end_io(struct bio *bio) { struct btrfs_raid_bio *rbio = bio->bi_private; blk_status_t err = bio->bi_status; if (err) rbio_update_error_bitmap(rbio, bio); bio_put(bio); if (atomic_dec_and_test(&rbio->stripes_pending)) wake_up(&rbio->io_wait); } static void submit_write_bios(struct btrfs_raid_bio *rbio, struct bio_list *bio_list) { struct bio *bio; atomic_set(&rbio->stripes_pending, bio_list_size(bio_list)); while ((bio = bio_list_pop(bio_list))) { bio->bi_end_io = raid_wait_write_end_io; if (trace_raid56_write_enabled()) { struct raid56_bio_trace_info trace_info = { 0 }; bio_get_trace_info(rbio, bio, &trace_info); trace_raid56_write(rbio, bio, &trace_info); } submit_bio(bio); } } /* * To determine if we need to read any sector from the disk. * Should only be utilized in RMW path, to skip cached rbio. */ static bool need_read_stripe_sectors(struct btrfs_raid_bio *rbio) { int i; for (i = 0; i < rbio->nr_data * rbio->stripe_nsectors; i++) { struct sector_ptr *sector = &rbio->stripe_sectors[i]; /* * We have a sector which doesn't have page nor uptodate, * thus this rbio can not be cached one, as cached one must * have all its data sectors present and uptodate. */ if (!sector->page || !sector->uptodate) return true; } return false; } static void rmw_rbio(struct btrfs_raid_bio *rbio) { struct bio_list bio_list; int sectornr; int ret = 0; /* * Allocate the pages for parity first, as P/Q pages will always be * needed for both full-stripe and sub-stripe writes. */ ret = alloc_rbio_parity_pages(rbio); if (ret < 0) goto out; /* * Either full stripe write, or we have every data sector already * cached, can go to write path immediately. */ if (!rbio_is_full(rbio) && need_read_stripe_sectors(rbio)) { /* * Now we're doing sub-stripe write, also need all data stripes * to do the full RMW. */ ret = alloc_rbio_data_pages(rbio); if (ret < 0) goto out; index_rbio_pages(rbio); ret = rmw_read_wait_recover(rbio); if (ret < 0) goto out; } /* * At this stage we're not allowed to add any new bios to the * bio list any more, anyone else that wants to change this stripe * needs to do their own rmw. */ spin_lock(&rbio->bio_list_lock); set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); spin_unlock(&rbio->bio_list_lock); bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors); index_rbio_pages(rbio); /* * We don't cache full rbios because we're assuming * the higher layers are unlikely to use this area of * the disk again soon. If they do use it again, * hopefully they will send another full bio. */ if (!rbio_is_full(rbio)) cache_rbio_pages(rbio); else clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) generate_pq_vertical(rbio, sectornr); bio_list_init(&bio_list); ret = rmw_assemble_write_bios(rbio, &bio_list); if (ret < 0) goto out; /* We should have at least one bio assembled. */ ASSERT(bio_list_size(&bio_list)); submit_write_bios(rbio, &bio_list); wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); /* We may have more errors than our tolerance during the read. */ for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) { int found_errors; found_errors = get_rbio_veritical_errors(rbio, sectornr, NULL, NULL); if (found_errors > rbio->bioc->max_errors) { ret = -EIO; break; } } out: rbio_orig_end_io(rbio, errno_to_blk_status(ret)); } static void rmw_rbio_work(struct work_struct *work) { struct btrfs_raid_bio *rbio; rbio = container_of(work, struct btrfs_raid_bio, work); if (lock_stripe_add(rbio) == 0) rmw_rbio(rbio); } static void rmw_rbio_work_locked(struct work_struct *work) { rmw_rbio(container_of(work, struct btrfs_raid_bio, work)); } /* * The following code is used to scrub/replace the parity stripe * * Caller must have already increased bio_counter for getting @bioc. * * Note: We need make sure all the pages that add into the scrub/replace * raid bio are correct and not be changed during the scrub/replace. That * is those pages just hold metadata or file data with checksum. */ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio, struct btrfs_io_context *bioc, struct btrfs_device *scrub_dev, unsigned long *dbitmap, int stripe_nsectors) { struct btrfs_fs_info *fs_info = bioc->fs_info; struct btrfs_raid_bio *rbio; int i; rbio = alloc_rbio(fs_info, bioc); if (IS_ERR(rbio)) return NULL; bio_list_add(&rbio->bio_list, bio); /* * This is a special bio which is used to hold the completion handler * and make the scrub rbio is similar to the other types */ ASSERT(!bio->bi_iter.bi_size); rbio->operation = BTRFS_RBIO_PARITY_SCRUB; /* * After mapping bioc with BTRFS_MAP_WRITE, parities have been sorted * to the end position, so this search can start from the first parity * stripe. */ for (i = rbio->nr_data; i < rbio->real_stripes; i++) { if (bioc->stripes[i].dev == scrub_dev) { rbio->scrubp = i; break; } } ASSERT_RBIO_STRIPE(i < rbio->real_stripes, rbio, i); bitmap_copy(&rbio->dbitmap, dbitmap, stripe_nsectors); return rbio; } /* * We just scrub the parity that we have correct data on the same horizontal, * so we needn't allocate all pages for all the stripes. */ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) { const u32 sectorsize = rbio->bioc->fs_info->sectorsize; int total_sector_nr; for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; total_sector_nr++) { struct page *page; int sectornr = total_sector_nr % rbio->stripe_nsectors; int index = (total_sector_nr * sectorsize) >> PAGE_SHIFT; if (!test_bit(sectornr, &rbio->dbitmap)) continue; if (rbio->stripe_pages[index]) continue; page = alloc_page(GFP_NOFS); if (!page) return -ENOMEM; rbio->stripe_pages[index] = page; } index_stripe_sectors(rbio); return 0; } static int finish_parity_scrub(struct btrfs_raid_bio *rbio) { struct btrfs_io_context *bioc = rbio->bioc; const u32 sectorsize = bioc->fs_info->sectorsize; void **pointers = rbio->finish_pointers; unsigned long *pbitmap = &rbio->finish_pbitmap; int nr_data = rbio->nr_data; int stripe; int sectornr; bool has_qstripe; struct sector_ptr p_sector = { 0 }; struct sector_ptr q_sector = { 0 }; struct bio_list bio_list; int is_replace = 0; int ret; bio_list_init(&bio_list); if (rbio->real_stripes - rbio->nr_data == 1) has_qstripe = false; else if (rbio->real_stripes - rbio->nr_data == 2) has_qstripe = true; else BUG(); /* * Replace is running and our P/Q stripe is being replaced, then we * need to duplicate the final write to replace target. */ if (bioc->replace_nr_stripes && bioc->replace_stripe_src == rbio->scrubp) { is_replace = 1; bitmap_copy(pbitmap, &rbio->dbitmap, rbio->stripe_nsectors); } /* * Because the higher layers(scrubber) are unlikely to * use this area of the disk again soon, so don't cache * it. */ clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); p_sector.page = alloc_page(GFP_NOFS); if (!p_sector.page) return -ENOMEM; p_sector.pgoff = 0; p_sector.uptodate = 1; if (has_qstripe) { /* RAID6, allocate and map temp space for the Q stripe */ q_sector.page = alloc_page(GFP_NOFS); if (!q_sector.page) { __free_page(p_sector.page); p_sector.page = NULL; return -ENOMEM; } q_sector.pgoff = 0; q_sector.uptodate = 1; pointers[rbio->real_stripes - 1] = kmap_local_page(q_sector.page); } bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors); /* Map the parity stripe just once */ pointers[nr_data] = kmap_local_page(p_sector.page); for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) { struct sector_ptr *sector; void *parity; /* first collect one page from each data stripe */ for (stripe = 0; stripe < nr_data; stripe++) { sector = sector_in_rbio(rbio, stripe, sectornr, 0); pointers[stripe] = kmap_local_page(sector->page) + sector->pgoff; } if (has_qstripe) { assert_rbio(rbio); /* RAID6, call the library function to fill in our P/Q */ raid6_call.gen_syndrome(rbio->real_stripes, sectorsize, pointers); } else { /* raid5 */ memcpy(pointers[nr_data], pointers[0], sectorsize); run_xor(pointers + 1, nr_data - 1, sectorsize); } /* Check scrubbing parity and repair it */ sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr); parity = kmap_local_page(sector->page) + sector->pgoff; if (memcmp(parity, pointers[rbio->scrubp], sectorsize) != 0) memcpy(parity, pointers[rbio->scrubp], sectorsize); else /* Parity is right, needn't writeback */ bitmap_clear(&rbio->dbitmap, sectornr, 1); kunmap_local(parity); for (stripe = nr_data - 1; stripe >= 0; stripe--) kunmap_local(pointers[stripe]); } kunmap_local(pointers[nr_data]); __free_page(p_sector.page); p_sector.page = NULL; if (q_sector.page) { kunmap_local(pointers[rbio->real_stripes - 1]); __free_page(q_sector.page); q_sector.page = NULL; } /* * time to start writing. Make bios for everything from the * higher layers (the bio_list in our rbio) and our p/q. Ignore * everything else. */ for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) { struct sector_ptr *sector; sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr); ret = rbio_add_io_sector(rbio, &bio_list, sector, rbio->scrubp, sectornr, REQ_OP_WRITE); if (ret) goto cleanup; } if (!is_replace) goto submit_write; /* * Replace is running and our parity stripe needs to be duplicated to * the target device. Check we have a valid source stripe number. */ ASSERT_RBIO(rbio->bioc->replace_stripe_src >= 0, rbio); for_each_set_bit(sectornr, pbitmap, rbio->stripe_nsectors) { struct sector_ptr *sector; sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr); ret = rbio_add_io_sector(rbio, &bio_list, sector, rbio->real_stripes, sectornr, REQ_OP_WRITE); if (ret) goto cleanup; } submit_write: submit_write_bios(rbio, &bio_list); return 0; cleanup: bio_list_put(&bio_list); return ret; } static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) { if (stripe >= 0 && stripe < rbio->nr_data) return 1; return 0; } static int recover_scrub_rbio(struct btrfs_raid_bio *rbio) { void **pointers = NULL; void **unmap_array = NULL; int sector_nr; int ret = 0; /* * @pointers array stores the pointer for each sector. * * @unmap_array stores copy of pointers that does not get reordered * during reconstruction so that kunmap_local works. */ pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); if (!pointers || !unmap_array) { ret = -ENOMEM; goto out; } for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { int dfail = 0, failp = -1; int faila; int failb; int found_errors; found_errors = get_rbio_veritical_errors(rbio, sector_nr, &faila, &failb); if (found_errors > rbio->bioc->max_errors) { ret = -EIO; goto out; } if (found_errors == 0) continue; /* We should have at least one error here. */ ASSERT(faila >= 0 || failb >= 0); if (is_data_stripe(rbio, faila)) dfail++; else if (is_parity_stripe(faila)) failp = faila; if (is_data_stripe(rbio, failb)) dfail++; else if (is_parity_stripe(failb)) failp = failb; /* * Because we can not use a scrubbing parity to repair the * data, so the capability of the repair is declined. (In the * case of RAID5, we can not repair anything.) */ if (dfail > rbio->bioc->max_errors - 1) { ret = -EIO; goto out; } /* * If all data is good, only parity is correctly, just repair * the parity, no need to recover data stripes. */ if (dfail == 0) continue; /* * Here means we got one corrupted data stripe and one * corrupted parity on RAID6, if the corrupted parity is * scrubbing parity, luckily, use the other one to repair the * data, or we can not repair the data stripe. */ if (failp != rbio->scrubp) { ret = -EIO; goto out; } ret = recover_vertical(rbio, sector_nr, pointers, unmap_array); if (ret < 0) goto out; } out: kfree(pointers); kfree(unmap_array); return ret; } static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio) { struct bio_list bio_list = BIO_EMPTY_LIST; int total_sector_nr; int ret = 0; /* Build a list of bios to read all the missing parts. */ for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; total_sector_nr++) { int sectornr = total_sector_nr % rbio->stripe_nsectors; int stripe = total_sector_nr / rbio->stripe_nsectors; struct sector_ptr *sector; /* No data in the vertical stripe, no need to read. */ if (!test_bit(sectornr, &rbio->dbitmap)) continue; /* * We want to find all the sectors missing from the rbio and * read them from the disk. If sector_in_rbio() finds a sector * in the bio list we don't need to read it off the stripe. */ sector = sector_in_rbio(rbio, stripe, sectornr, 1); if (sector) continue; sector = rbio_stripe_sector(rbio, stripe, sectornr); /* * The bio cache may have handed us an uptodate sector. If so, * use it. */ if (sector->uptodate) continue; ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe, sectornr, REQ_OP_READ); if (ret) { bio_list_put(&bio_list); return ret; } } submit_read_wait_bio_list(rbio, &bio_list); return 0; } static void scrub_rbio(struct btrfs_raid_bio *rbio) { int sector_nr; int ret; ret = alloc_rbio_essential_pages(rbio); if (ret) goto out; bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors); ret = scrub_assemble_read_bios(rbio); if (ret < 0) goto out; /* We may have some failures, recover the failed sectors first. */ ret = recover_scrub_rbio(rbio); if (ret < 0) goto out; /* * We have every sector properly prepared. Can finish the scrub * and writeback the good content. */ ret = finish_parity_scrub(rbio); wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { int found_errors; found_errors = get_rbio_veritical_errors(rbio, sector_nr, NULL, NULL); if (found_errors > rbio->bioc->max_errors) { ret = -EIO; break; } } out: rbio_orig_end_io(rbio, errno_to_blk_status(ret)); } static void scrub_rbio_work_locked(struct work_struct *work) { scrub_rbio(container_of(work, struct btrfs_raid_bio, work)); } void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) { if (!lock_stripe_add(rbio)) start_async_work(rbio, scrub_rbio_work_locked); } /* * This is for scrub call sites where we already have correct data contents. * This allows us to avoid reading data stripes again. * * Unfortunately here we have to do page copy, other than reusing the pages. * This is due to the fact rbio has its own page management for its cache. */ void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio, struct page **data_pages, u64 data_logical) { const u64 offset_in_full_stripe = data_logical - rbio->bioc->full_stripe_logical; const int page_index = offset_in_full_stripe >> PAGE_SHIFT; const u32 sectorsize = rbio->bioc->fs_info->sectorsize; const u32 sectors_per_page = PAGE_SIZE / sectorsize; int ret; /* * If we hit ENOMEM temporarily, but later at * raid56_parity_submit_scrub_rbio() time it succeeded, we just do * the extra read, not a big deal. * * If we hit ENOMEM later at raid56_parity_submit_scrub_rbio() time, * the bio would got proper error number set. */ ret = alloc_rbio_data_pages(rbio); if (ret < 0) return; /* data_logical must be at stripe boundary and inside the full stripe. */ ASSERT(IS_ALIGNED(offset_in_full_stripe, BTRFS_STRIPE_LEN)); ASSERT(offset_in_full_stripe < (rbio->nr_data << BTRFS_STRIPE_LEN_SHIFT)); for (int page_nr = 0; page_nr < (BTRFS_STRIPE_LEN >> PAGE_SHIFT); page_nr++) { struct page *dst = rbio->stripe_pages[page_nr + page_index]; struct page *src = data_pages[page_nr]; memcpy_page(dst, 0, src, 0, PAGE_SIZE); for (int sector_nr = sectors_per_page * page_index; sector_nr < sectors_per_page * (page_index + 1); sector_nr++) rbio->stripe_sectors[sector_nr].uptodate = true; } }
11 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 // SPDX-License-Identifier: GPL-2.0-only /* * drivers/acpi/device_sysfs.c - ACPI device sysfs attributes and modalias. * * Copyright (C) 2015, Intel Corp. * Author: Mika Westerberg <mika.westerberg@linux.intel.com> * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <linux/acpi.h> #include <linux/device.h> #include <linux/export.h> #include <linux/nls.h> #include "internal.h" static ssize_t acpi_object_path(acpi_handle handle, char *buf) { struct acpi_buffer path = {ACPI_ALLOCATE_BUFFER, NULL}; int result; result = acpi_get_name(handle, ACPI_FULL_PATHNAME, &path); if (result) return result; result = sprintf(buf, "%s\n", (char *)path.pointer); kfree(path.pointer); return result; } struct acpi_data_node_attr { struct attribute attr; ssize_t (*show)(struct acpi_data_node *, char *); ssize_t (*store)(struct acpi_data_node *, const char *, size_t count); }; #define DATA_NODE_ATTR(_name) \ static struct acpi_data_node_attr data_node_##_name = \ __ATTR(_name, 0444, data_node_show_##_name, NULL) static ssize_t data_node_show_path(struct acpi_data_node *dn, char *buf) { return dn->handle ? acpi_object_path(dn->handle, buf) : 0; } DATA_NODE_ATTR(path); static struct attribute *acpi_data_node_default_attrs[] = { &data_node_path.attr, NULL }; ATTRIBUTE_GROUPS(acpi_data_node_default); #define to_data_node(k) container_of(k, struct acpi_data_node, kobj) #define to_attr(a) container_of(a, struct acpi_data_node_attr, attr) static ssize_t acpi_data_node_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct acpi_data_node *dn = to_data_node(kobj); struct acpi_data_node_attr *dn_attr = to_attr(attr); return dn_attr->show ? dn_attr->show(dn, buf) : -ENXIO; } static const struct sysfs_ops acpi_data_node_sysfs_ops = { .show = acpi_data_node_attr_show, }; static void acpi_data_node_release(struct kobject *kobj) { struct acpi_data_node *dn = to_data_node(kobj); complete(&dn->kobj_done); } static const struct kobj_type acpi_data_node_ktype = { .sysfs_ops = &acpi_data_node_sysfs_ops, .default_groups = acpi_data_node_default_groups, .release = acpi_data_node_release, }; static void acpi_expose_nondev_subnodes(struct kobject *kobj, struct acpi_device_data *data) { struct list_head *list = &data->subnodes; struct acpi_data_node *dn; if (list_empty(list)) return; list_for_each_entry(dn, list, sibling) { int ret; init_completion(&dn->kobj_done); ret = kobject_init_and_add(&dn->kobj, &acpi_data_node_ktype, kobj, "%s", dn->name); if (!ret) acpi_expose_nondev_subnodes(&dn->kobj, &dn->data); else if (dn->handle) acpi_handle_err(dn->handle, "Failed to expose (%d)\n", ret); } } static void acpi_hide_nondev_subnodes(struct acpi_device_data *data) { struct list_head *list = &data->subnodes; struct acpi_data_node *dn; if (list_empty(list)) return; list_for_each_entry_reverse(dn, list, sibling) { acpi_hide_nondev_subnodes(&dn->data); kobject_put(&dn->kobj); } } /** * create_pnp_modalias - Create hid/cid(s) string for modalias and uevent * @acpi_dev: ACPI device object. * @modalias: Buffer to print into. * @size: Size of the buffer. * * Creates hid/cid(s) string needed for modalias and uevent * e.g. on a device with hid:IBM0001 and cid:ACPI0001 you get: * char *modalias: "acpi:IBM0001:ACPI0001" * Return: 0: no _HID and no _CID * -EINVAL: output error * -ENOMEM: output is truncated */ static int create_pnp_modalias(const struct acpi_device *acpi_dev, char *modalias, int size) { int len; int count; struct acpi_hardware_id *id; /* Avoid unnecessarily loading modules for non present devices. */ if (!acpi_device_is_present(acpi_dev)) return 0; /* * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the * device's list. */ count = 0; list_for_each_entry(id, &acpi_dev->pnp.ids, list) if (strcmp(id->id, ACPI_DT_NAMESPACE_HID)) count++; if (!count) return 0; len = snprintf(modalias, size, "acpi:"); if (len >= size) return -ENOMEM; size -= len; list_for_each_entry(id, &acpi_dev->pnp.ids, list) { if (!strcmp(id->id, ACPI_DT_NAMESPACE_HID)) continue; count = snprintf(&modalias[len], size, "%s:", id->id); if (count >= size) return -ENOMEM; len += count; size -= count; } return len; } /** * create_of_modalias - Creates DT compatible string for modalias and uevent * @acpi_dev: ACPI device object. * @modalias: Buffer to print into. * @size: Size of the buffer. * * Expose DT compatible modalias as of:NnameTCcompatible. This function should * only be called for devices having ACPI_DT_NAMESPACE_HID in their list of * ACPI/PNP IDs. */ static int create_of_modalias(const struct acpi_device *acpi_dev, char *modalias, int size) { struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER }; const union acpi_object *of_compatible, *obj; acpi_status status; int len, count; int i, nval; char *c; status = acpi_get_name(acpi_dev->handle, ACPI_SINGLE_NAME, &buf); if (ACPI_FAILURE(status)) return -ENODEV; /* DT strings are all in lower case */ for (c = buf.pointer; *c != '\0'; c++) *c = tolower(*c); len = snprintf(modalias, size, "of:N%sT", (char *)buf.pointer); ACPI_FREE(buf.pointer); if (len >= size) return -ENOMEM; size -= len; of_compatible = acpi_dev->data.of_compatible; if (of_compatible->type == ACPI_TYPE_PACKAGE) { nval = of_compatible->package.count; obj = of_compatible->package.elements; } else { /* Must be ACPI_TYPE_STRING. */ nval = 1; obj = of_compatible; } for (i = 0; i < nval; i++, obj++) { count = snprintf(&modalias[len], size, "C%s", obj->string.pointer); if (count >= size) return -ENOMEM; len += count; size -= count; } return len; } int __acpi_device_uevent_modalias(const struct acpi_device *adev, struct kobj_uevent_env *env) { int len; if (!adev) return -ENODEV; if (list_empty(&adev->pnp.ids)) return 0; if (add_uevent_var(env, "MODALIAS=")) return -ENOMEM; if (adev->data.of_compatible) len = create_of_modalias(adev, &env->buf[env->buflen - 1], sizeof(env->buf) - env->buflen); else len = create_pnp_modalias(adev, &env->buf[env->buflen - 1], sizeof(env->buf) - env->buflen); if (len < 0) return len; env->buflen += len; return 0; } /** * acpi_device_uevent_modalias - uevent modalias for ACPI-enumerated devices. * @dev: Struct device to get ACPI device node. * @env: Environment variables of the kobject uevent. * * Create the uevent modalias field for ACPI-enumerated devices. * * Because other buses do not support ACPI HIDs & CIDs, e.g. for a device with * hid:IBM0001 and cid:ACPI0001 you get: "acpi:IBM0001:ACPI0001". */ int acpi_device_uevent_modalias(const struct device *dev, struct kobj_uevent_env *env) { return __acpi_device_uevent_modalias(acpi_companion_match(dev), env); } EXPORT_SYMBOL_GPL(acpi_device_uevent_modalias); static int __acpi_device_modalias(const struct acpi_device *adev, char *buf, int size) { int len, count; if (!adev) return -ENODEV; if (list_empty(&adev->pnp.ids)) return 0; len = create_pnp_modalias(adev, buf, size - 1); if (len < 0) { return len; } else if (len > 0) { buf[len++] = '\n'; size -= len; } if (!adev->data.of_compatible) return len; count = create_of_modalias(adev, buf + len, size - 1); if (count < 0) { return count; } else if (count > 0) { len += count; buf[len++] = '\n'; } return len; } /** * acpi_device_modalias - modalias sysfs attribute for ACPI-enumerated devices. * @dev: Struct device to get ACPI device node. * @buf: The buffer to save pnp_modalias and of_modalias. * @size: Size of buffer. * * Create the modalias sysfs attribute for ACPI-enumerated devices. * * Because other buses do not support ACPI HIDs & CIDs, e.g. for a device with * hid:IBM0001 and cid:ACPI0001 you get: "acpi:IBM0001:ACPI0001". */ int acpi_device_modalias(struct device *dev, char *buf, int size) { return __acpi_device_modalias(acpi_companion_match(dev), buf, size); } EXPORT_SYMBOL_GPL(acpi_device_modalias); static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { return __acpi_device_modalias(to_acpi_device(dev), buf, 1024); } static DEVICE_ATTR_RO(modalias); static ssize_t real_power_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *adev = to_acpi_device(dev); int state; int ret; ret = acpi_device_get_power(adev, &state); if (ret) return ret; return sprintf(buf, "%s\n", acpi_power_state_string(state)); } static DEVICE_ATTR_RO(real_power_state); static ssize_t power_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *adev = to_acpi_device(dev); return sprintf(buf, "%s\n", acpi_power_state_string(adev->power.state)); } static DEVICE_ATTR_RO(power_state); static ssize_t eject_store(struct device *d, struct device_attribute *attr, const char *buf, size_t count) { struct acpi_device *acpi_device = to_acpi_device(d); acpi_object_type not_used; acpi_status status; if (!count || buf[0] != '1') return -EINVAL; if ((!acpi_device->handler || !acpi_device->handler->hotplug.enabled) && !d->driver) return -ENODEV; status = acpi_get_type(acpi_device->handle, &not_used); if (ACPI_FAILURE(status) || !acpi_device->flags.ejectable) return -ENODEV; acpi_dev_get(acpi_device); status = acpi_hotplug_schedule(acpi_device, ACPI_OST_EC_OSPM_EJECT); if (ACPI_SUCCESS(status)) return count; acpi_dev_put(acpi_device); acpi_evaluate_ost(acpi_device->handle, ACPI_OST_EC_OSPM_EJECT, ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL); return status == AE_NO_MEMORY ? -ENOMEM : -EAGAIN; } static DEVICE_ATTR_WO(eject); static ssize_t hid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); return sprintf(buf, "%s\n", acpi_device_hid(acpi_dev)); } static DEVICE_ATTR_RO(hid); static ssize_t uid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); return sprintf(buf, "%s\n", acpi_device_uid(acpi_dev)); } static DEVICE_ATTR_RO(uid); static ssize_t adr_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); if (acpi_dev->pnp.bus_address > U32_MAX) return sprintf(buf, "0x%016llx\n", acpi_dev->pnp.bus_address); else return sprintf(buf, "0x%08llx\n", acpi_dev->pnp.bus_address); } static DEVICE_ATTR_RO(adr); static ssize_t path_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); return acpi_object_path(acpi_dev->handle, buf); } static DEVICE_ATTR_RO(path); /* sysfs file that shows description text from the ACPI _STR method */ static ssize_t description_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; union acpi_object *str_obj; acpi_status status; int result; status = acpi_evaluate_object_typed(acpi_dev->handle, "_STR", NULL, &buffer, ACPI_TYPE_BUFFER); if (ACPI_FAILURE(status)) return -EIO; str_obj = buffer.pointer; /* * The _STR object contains a Unicode identifier for a device. * We need to convert to utf-8 so it can be displayed. */ result = utf16s_to_utf8s( (wchar_t *)str_obj->buffer.pointer, str_obj->buffer.length, UTF16_LITTLE_ENDIAN, buf, PAGE_SIZE - 1); buf[result++] = '\n'; kfree(str_obj); return result; } static DEVICE_ATTR_RO(description); static ssize_t sun_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); acpi_status status; unsigned long long sun; status = acpi_evaluate_integer(acpi_dev->handle, "_SUN", NULL, &sun); if (ACPI_FAILURE(status)) return -EIO; return sprintf(buf, "%llu\n", sun); } static DEVICE_ATTR_RO(sun); static ssize_t hrv_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); acpi_status status; unsigned long long hrv; status = acpi_evaluate_integer(acpi_dev->handle, "_HRV", NULL, &hrv); if (ACPI_FAILURE(status)) return -EIO; return sprintf(buf, "%llu\n", hrv); } static DEVICE_ATTR_RO(hrv); static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); acpi_status status; unsigned long long sta; status = acpi_evaluate_integer(acpi_dev->handle, "_STA", NULL, &sta); if (ACPI_FAILURE(status)) return -EIO; return sprintf(buf, "%llu\n", sta); } static DEVICE_ATTR_RO(status); static struct attribute *acpi_attrs[] = { &dev_attr_path.attr, &dev_attr_hid.attr, &dev_attr_modalias.attr, &dev_attr_description.attr, &dev_attr_adr.attr, &dev_attr_uid.attr, &dev_attr_sun.attr, &dev_attr_hrv.attr, &dev_attr_status.attr, &dev_attr_eject.attr, &dev_attr_power_state.attr, &dev_attr_real_power_state.attr, NULL }; static bool acpi_show_attr(struct acpi_device *dev, const struct device_attribute *attr) { /* * Devices gotten from FADT don't have a "path" attribute */ if (attr == &dev_attr_path) return dev->handle; if (attr == &dev_attr_hid || attr == &dev_attr_modalias) return !list_empty(&dev->pnp.ids); if (attr == &dev_attr_description) return acpi_has_method(dev->handle, "_STR"); if (attr == &dev_attr_adr) return dev->pnp.type.bus_address; if (attr == &dev_attr_uid) return acpi_device_uid(dev); if (attr == &dev_attr_sun) return acpi_has_method(dev->handle, "_SUN"); if (attr == &dev_attr_hrv) return acpi_has_method(dev->handle, "_HRV"); if (attr == &dev_attr_status) return acpi_has_method(dev->handle, "_STA"); /* * If device has _EJ0, 'eject' file is created that is used to trigger * hot-removal function from userland. */ if (attr == &dev_attr_eject) return acpi_has_method(dev->handle, "_EJ0"); if (attr == &dev_attr_power_state) return dev->flags.power_manageable; if (attr == &dev_attr_real_power_state) return dev->flags.power_manageable && dev->power.flags.power_resources; dev_warn_once(&dev->dev, "Unexpected attribute: %s\n", attr->attr.name); return false; } static umode_t acpi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int attrno) { struct acpi_device *dev = to_acpi_device(kobj_to_dev(kobj)); if (acpi_show_attr(dev, container_of(attr, struct device_attribute, attr))) return attr->mode; else return 0; } static const struct attribute_group acpi_group = { .attrs = acpi_attrs, .is_visible = acpi_attr_is_visible, }; const struct attribute_group *acpi_groups[] = { &acpi_group, NULL }; /** * acpi_device_setup_files - Create sysfs attributes of an ACPI device. * @dev: ACPI device object. */ void acpi_device_setup_files(struct acpi_device *dev) { acpi_expose_nondev_subnodes(&dev->dev.kobj, &dev->data); } /** * acpi_device_remove_files - Remove sysfs attributes of an ACPI device. * @dev: ACPI device object. */ void acpi_device_remove_files(struct acpi_device *dev) { acpi_hide_nondev_subnodes(&dev->data); }
94 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 // SPDX-License-Identifier: GPL-2.0+ /* * Fast-charge control for Apple "MFi" devices * * Copyright (C) 2019 Bastien Nocera <hadess@hadess.net> */ /* Standard include files */ #include <linux/module.h> #include <linux/power_supply.h> #include <linux/slab.h> #include <linux/usb.h> MODULE_AUTHOR("Bastien Nocera <hadess@hadess.net>"); MODULE_DESCRIPTION("Fast-charge control for Apple \"MFi\" devices"); MODULE_LICENSE("GPL"); #define TRICKLE_CURRENT_MA 0 #define FAST_CURRENT_MA 2500 #define APPLE_VENDOR_ID 0x05ac /* Apple */ /* The product ID is defined as starting with 0x12nn, as per the * "Choosing an Apple Device USB Configuration" section in * release R9 (2012) of the "MFi Accessory Hardware Specification" * * To distinguish an Apple device, a USB host can check the device * descriptor of attached USB devices for the following fields: * ■ Vendor ID: 0x05AC * ■ Product ID: 0x12nn * * Those checks will be done in .match() and .probe(). */ static const struct usb_device_id mfi_fc_id_table[] = { { .idVendor = APPLE_VENDOR_ID, .match_flags = USB_DEVICE_ID_MATCH_VENDOR }, {}, }; MODULE_DEVICE_TABLE(usb, mfi_fc_id_table); /* Driver-local specific stuff */ struct mfi_device { struct usb_device *udev; struct power_supply *battery; int charge_type; }; static int apple_mfi_fc_set_charge_type(struct mfi_device *mfi, const union power_supply_propval *val) { int current_ma; int retval; __u8 request_type; if (mfi->charge_type == val->intval) { dev_dbg(&mfi->udev->dev, "charge type %d already set\n", mfi->charge_type); return 0; } switch (val->intval) { case POWER_SUPPLY_CHARGE_TYPE_TRICKLE: current_ma = TRICKLE_CURRENT_MA; break; case POWER_SUPPLY_CHARGE_TYPE_FAST: current_ma = FAST_CURRENT_MA; break; default: return -EINVAL; } request_type = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE; retval = usb_control_msg(mfi->udev, usb_sndctrlpipe(mfi->udev, 0), 0x40, /* Vendor‐defined power request */ request_type, current_ma, /* wValue, current offset */ current_ma, /* wIndex, current offset */ NULL, 0, USB_CTRL_GET_TIMEOUT); if (retval) { dev_dbg(&mfi->udev->dev, "retval = %d\n", retval); return retval; } mfi->charge_type = val->intval; return 0; } static int apple_mfi_fc_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val) { struct mfi_device *mfi = power_supply_get_drvdata(psy); dev_dbg(&mfi->udev->dev, "prop: %d\n", psp); switch (psp) { case POWER_SUPPLY_PROP_CHARGE_TYPE: val->intval = mfi->charge_type; break; case POWER_SUPPLY_PROP_SCOPE: val->intval = POWER_SUPPLY_SCOPE_DEVICE; break; default: return -ENODATA; } return 0; } static int apple_mfi_fc_set_property(struct power_supply *psy, enum power_supply_property psp, const union power_supply_propval *val) { struct mfi_device *mfi = power_supply_get_drvdata(psy); int ret; dev_dbg(&mfi->udev->dev, "prop: %d\n", psp); ret = pm_runtime_get_sync(&mfi->udev->dev); if (ret < 0) { pm_runtime_put_noidle(&mfi->udev->dev); return ret; } switch (psp) { case POWER_SUPPLY_PROP_CHARGE_TYPE: ret = apple_mfi_fc_set_charge_type(mfi, val); break; default: ret = -EINVAL; } pm_runtime_mark_last_busy(&mfi->udev->dev); pm_runtime_put_autosuspend(&mfi->udev->dev); return ret; } static int apple_mfi_fc_property_is_writeable(struct power_supply *psy, enum power_supply_property psp) { switch (psp) { case POWER_SUPPLY_PROP_CHARGE_TYPE: return 1; default: return 0; } } static enum power_supply_property apple_mfi_fc_properties[] = { POWER_SUPPLY_PROP_CHARGE_TYPE, POWER_SUPPLY_PROP_SCOPE }; static const struct power_supply_desc apple_mfi_fc_desc = { .name = "apple_mfi_fastcharge", .type = POWER_SUPPLY_TYPE_BATTERY, .properties = apple_mfi_fc_properties, .num_properties = ARRAY_SIZE(apple_mfi_fc_properties), .get_property = apple_mfi_fc_get_property, .set_property = apple_mfi_fc_set_property, .property_is_writeable = apple_mfi_fc_property_is_writeable }; static bool mfi_fc_match(struct usb_device *udev) { int idProduct; idProduct = le16_to_cpu(udev->descriptor.idProduct); /* See comment above mfi_fc_id_table[] */ return (idProduct >= 0x1200 && idProduct <= 0x12ff); } static int mfi_fc_probe(struct usb_device *udev) { struct power_supply_config battery_cfg = {}; struct mfi_device *mfi = NULL; int err; if (!mfi_fc_match(udev)) return -ENODEV; mfi = kzalloc(sizeof(struct mfi_device), GFP_KERNEL); if (!mfi) return -ENOMEM; battery_cfg.drv_data = mfi; mfi->charge_type = POWER_SUPPLY_CHARGE_TYPE_TRICKLE; mfi->battery = power_supply_register(&udev->dev, &apple_mfi_fc_desc, &battery_cfg); if (IS_ERR(mfi->battery)) { dev_err(&udev->dev, "Can't register battery\n"); err = PTR_ERR(mfi->battery); kfree(mfi); return err; } mfi->udev = usb_get_dev(udev); dev_set_drvdata(&udev->dev, mfi); return 0; } static void mfi_fc_disconnect(struct usb_device *udev) { struct mfi_device *mfi; mfi = dev_get_drvdata(&udev->dev); if (mfi->battery) power_supply_unregister(mfi->battery); dev_set_drvdata(&udev->dev, NULL); usb_put_dev(mfi->udev); kfree(mfi); } static struct usb_device_driver mfi_fc_driver = { .name = "apple-mfi-fastcharge", .probe = mfi_fc_probe, .disconnect = mfi_fc_disconnect, .id_table = mfi_fc_id_table, .match = mfi_fc_match, .generic_subclass = 1, }; static int __init mfi_fc_driver_init(void) { return usb_register_device_driver(&mfi_fc_driver, THIS_MODULE); } static void __exit mfi_fc_driver_exit(void) { usb_deregister_device_driver(&mfi_fc_driver); } module_init(mfi_fc_driver_init); module_exit(mfi_fc_driver_exit);
17 8 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 /* * IPv6 specific functions of netfilter core * * Rusty Russell (C) 2000 -- This code is GPL. * Patrick McHardy (C) 2006-2012 */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/ipv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/export.h> #include <net/addrconf.h> #include <net/dst.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/xfrm.h> #include <net/netfilter/nf_queue.h> #include <net/netfilter/nf_conntrack_bridge.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include "../bridge/br_private.h" int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); struct sock *sk = sk_to_full_sk(sk_partial); struct net_device *dev = skb_dst(skb)->dev; struct flow_keys flkeys; unsigned int hh_len; struct dst_entry *dst; int strict = (ipv6_addr_type(&iph->daddr) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct flowi6 fl6 = { .flowi6_l3mdev = l3mdev_master_ifindex(dev), .flowi6_mark = skb->mark, .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, .saddr = iph->saddr, .flowlabel = ip6_flowinfo(iph), }; int err; if (sk && sk->sk_bound_dev_if) fl6.flowi6_oif = sk->sk_bound_dev_if; else if (strict) fl6.flowi6_oif = dev->ifindex; fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys); dst = ip6_route_output(net, sk, &fl6); err = dst->error; if (err) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); net_dbg_ratelimited("ip6_route_me_harder: No more route\n"); dst_release(dst); return err; } /* Drop old route. */ skb_dst_drop(skb); skb_dst_set(skb, dst); #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) return PTR_ERR(dst); skb_dst_set(skb, dst); } #endif /* Change in oif may mean change in hh_len. */ hh_len = skb_dst(skb)->dev->hard_header_len; if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 0, GFP_ATOMIC)) return -ENOMEM; return 0; } EXPORT_SYMBOL(ip6_route_me_harder); static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) { struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); if (entry->state.hook == NF_INET_LOCAL_OUT) { const struct ipv6hdr *iph = ipv6_hdr(skb); if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || skb->mark != rt_info->mark) return ip6_route_me_harder(entry->state.net, entry->state.sk, skb); } return 0; } int __nf_ip6_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict) { static const struct ipv6_pinfo fake_pinfo; static const struct inet_sock fake_sk = { /* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */ .sk.sk_bound_dev_if = 1, .pinet6 = (struct ipv6_pinfo *) &fake_pinfo, }; const void *sk = strict ? &fake_sk : NULL; struct dst_entry *result; int err; result = ip6_route_output(net, sk, &fl->u.ip6); err = result->error; if (err) dst_release(result); else *dst = result; return err; } EXPORT_SYMBOL_GPL(__nf_ip6_route); int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct nf_bridge_frag_data *data, int (*output)(struct net *, struct sock *sk, const struct nf_bridge_frag_data *data, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; u8 tstamp_type = skb->tstamp_type; ktime_t tstamp = skb->tstamp; struct ip6_frag_state state; u8 *prevhdr, nexthdr = 0; unsigned int mtu, hlen; int hroom, err = 0; __be32 frag_id; err = ip6_find_1stfragopt(skb, &prevhdr); if (err < 0) goto blackhole; hlen = err; nexthdr = *prevhdr; mtu = skb->dev->mtu; if (frag_max_size > mtu || frag_max_size < IPV6_MIN_MTU) goto blackhole; mtu = frag_max_size; if (mtu < hlen + sizeof(struct frag_hdr) + 8) goto blackhole; mtu -= hlen + sizeof(struct frag_hdr); frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr); if (skb->ip_summed == CHECKSUM_PARTIAL && (err = skb_checksum_help(skb))) goto blackhole; hroom = LL_RESERVED_SPACE(skb->dev); if (skb_has_frag_list(skb)) { unsigned int first_len = skb_pagelen(skb); struct ip6_fraglist_iter iter; struct sk_buff *frag2; if (first_len - hlen > mtu || skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) goto blackhole; if (skb_cloned(skb)) goto slow_path; skb_walk_frags(skb, frag2) { if (frag2->len > mtu || skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr))) goto blackhole; /* Partially cloned skb? */ if (skb_shared(frag2)) goto slow_path; } err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, &iter); if (err < 0) goto blackhole; for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (iter.frag) ip6_fraglist_prepare(skb, &iter); skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, data, skb); if (err || !iter.frag) break; skb = ip6_fraglist_next(&iter); } kfree(iter.tmp_hdr); if (!err) return 0; kfree_skb_list(iter.frag); return err; } slow_path: /* This is a linearized skbuff, the original geometry is lost for us. * This may also be a clone skbuff, we could preserve the geometry for * the copies but probably not worth the effort. */ ip6_frag_init(skb, hlen, mtu, skb->dev->needed_tailroom, LL_RESERVED_SPACE(skb->dev), prevhdr, nexthdr, frag_id, &state); while (state.left > 0) { struct sk_buff *skb2; skb2 = ip6_frag_next(skb, &state); if (IS_ERR(skb2)) { err = PTR_ERR(skb2); goto blackhole; } skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, data, skb2); if (err) goto blackhole; } consume_skb(skb); return err; blackhole: kfree_skb(skb); return 0; } EXPORT_SYMBOL_GPL(br_ip6_fragment); static const struct nf_ipv6_ops ipv6ops = { #if IS_MODULE(CONFIG_IPV6) .chk_addr = ipv6_chk_addr, .route_me_harder = ip6_route_me_harder, .dev_get_saddr = ipv6_dev_get_saddr, .route = __nf_ip6_route, #if IS_ENABLED(CONFIG_SYN_COOKIES) .cookie_init_sequence = __cookie_v6_init_sequence, .cookie_v6_check = __cookie_v6_check, #endif #endif .route_input = ip6_route_input, .fragment = ip6_fragment, .reroute = nf_ip6_reroute, #if IS_MODULE(CONFIG_IPV6) .br_fragment = br_ip6_fragment, #endif }; int __init ipv6_netfilter_init(void) { RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops); return 0; } /* This can be called from inet6_init() on errors, so it cannot * be marked __exit. -DaveM */ void ipv6_netfilter_fini(void) { RCU_INIT_POINTER(nf_ipv6_ops, NULL); }
7 7 2 2 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 // SPDX-License-Identifier: GPL-2.0-only /* * This is a module which is used for logging packets. */ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/ip.h> #include <net/ipv6.h> #include <net/icmp.h> #include <net/udp.h> #include <net/tcp.h> #include <net/route.h> #include <linux/netfilter.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_LOG.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/netfilter/nf_log.h> static unsigned int log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_log_info *loginfo = par->targinfo; struct net *net = xt_net(par); struct nf_loginfo li; li.type = NF_LOG_TYPE_LOG; li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par), xt_out(par), &li, "%s", loginfo->prefix); return XT_CONTINUE; } static int log_tg_check(const struct xt_tgchk_param *par) { const struct xt_log_info *loginfo = par->targinfo; int ret; if (par->family != NFPROTO_IPV4 && par->family != NFPROTO_IPV6) return -EINVAL; if (loginfo->level >= 8) { pr_debug("level %u >= 8\n", loginfo->level); return -EINVAL; } if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { pr_debug("prefix is not null-terminated\n"); return -EINVAL; } ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); if (ret != 0 && !par->nft_compat) { request_module("%s", "nf_log_syslog"); ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); } return ret; } static void log_tg_destroy(const struct xt_tgdtor_param *par) { nf_logger_put(par->family, NF_LOG_TYPE_LOG); } static struct xt_target log_tg_regs[] __read_mostly = { { .name = "LOG", .family = NFPROTO_IPV4, .target = log_tg, .targetsize = sizeof(struct xt_log_info), .checkentry = log_tg_check, .destroy = log_tg_destroy, .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "LOG", .family = NFPROTO_IPV6, .target = log_tg, .targetsize = sizeof(struct xt_log_info), .checkentry = log_tg_check, .destroy = log_tg_destroy, .me = THIS_MODULE, }, #endif }; static int __init log_tg_init(void) { return xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); } static void __exit log_tg_exit(void) { xt_unregister_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); } module_init(log_tg_init); module_exit(log_tg_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>"); MODULE_DESCRIPTION("Xtables: IPv4/IPv6 packet logging"); MODULE_ALIAS("ipt_LOG"); MODULE_ALIAS("ip6t_LOG"); MODULE_SOFTDEP("pre: nf_log_syslog");
4 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 // SPDX-License-Identifier: GPL-2.0-only /* * Pegasus Mobile Notetaker Pen input tablet driver * * Copyright (c) 2016 Martin Kepplinger <martink@posteo.de> */ /* * request packet (control endpoint): * |-------------------------------------| * | Report ID | Nr of bytes | command | * | (1 byte) | (1 byte) | (n bytes) | * |-------------------------------------| * | 0x02 | n | | * |-------------------------------------| * * data packet after set xy mode command, 0x80 0xb5 0x02 0x01 * and pen is in range: * * byte byte name value (bits) * -------------------------------------------- * 0 status 0 1 0 0 0 0 X X * 1 color 0 0 0 0 H 0 S T * 2 X low * 3 X high * 4 Y low * 5 Y high * * X X battery state: * no state reported 0x00 * battery low 0x01 * battery good 0x02 * * H Hovering * S Switch 1 (pen button) * T Tip */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/input.h> #include <linux/usb/input.h> #include <linux/slab.h> #include <linux/workqueue.h> #include <linux/mutex.h> /* USB HID defines */ #define USB_REQ_GET_REPORT 0x01 #define USB_REQ_SET_REPORT 0x09 #define USB_VENDOR_ID_PEGASUSTECH 0x0e20 #define USB_DEVICE_ID_PEGASUS_NOTETAKER_EN100 0x0101 /* device specific defines */ #define NOTETAKER_REPORT_ID 0x02 #define NOTETAKER_SET_CMD 0x80 #define NOTETAKER_SET_MODE 0xb5 #define NOTETAKER_LED_MOUSE 0x02 #define PEN_MODE_XY 0x01 #define SPECIAL_COMMAND 0x80 #define BUTTON_PRESSED 0xb5 #define COMMAND_VERSION 0xa9 /* in xy data packet */ #define BATTERY_NO_REPORT 0x40 #define BATTERY_LOW 0x41 #define BATTERY_GOOD 0x42 #define PEN_BUTTON_PRESSED BIT(1) #define PEN_TIP BIT(0) struct pegasus { unsigned char *data; u8 data_len; dma_addr_t data_dma; struct input_dev *dev; struct usb_device *usbdev; struct usb_interface *intf; struct urb *irq; /* serialize access to open/suspend */ struct mutex pm_mutex; bool is_open; char name[128]; char phys[64]; struct work_struct init; }; static int pegasus_control_msg(struct pegasus *pegasus, u8 *data, int len) { const int sizeof_buf = len + 2; int result; int error; u8 *cmd_buf; cmd_buf = kmalloc(sizeof_buf, GFP_KERNEL); if (!cmd_buf) return -ENOMEM; cmd_buf[0] = NOTETAKER_REPORT_ID; cmd_buf[1] = len; memcpy(cmd_buf + 2, data, len); result = usb_control_msg(pegasus->usbdev, usb_sndctrlpipe(pegasus->usbdev, 0), USB_REQ_SET_REPORT, USB_TYPE_VENDOR | USB_DIR_OUT, 0, 0, cmd_buf, sizeof_buf, USB_CTRL_SET_TIMEOUT); kfree(cmd_buf); if (unlikely(result != sizeof_buf)) { error = result < 0 ? result : -EIO; dev_err(&pegasus->usbdev->dev, "control msg error: %d\n", error); return error; } return 0; } static int pegasus_set_mode(struct pegasus *pegasus, u8 mode, u8 led) { u8 cmd[] = { NOTETAKER_SET_CMD, NOTETAKER_SET_MODE, led, mode }; return pegasus_control_msg(pegasus, cmd, sizeof(cmd)); } static void pegasus_parse_packet(struct pegasus *pegasus) { unsigned char *data = pegasus->data; struct input_dev *dev = pegasus->dev; u16 x, y; switch (data[0]) { case SPECIAL_COMMAND: /* device button pressed */ if (data[1] == BUTTON_PRESSED) schedule_work(&pegasus->init); break; /* xy data */ case BATTERY_LOW: dev_warn_once(&dev->dev, "Pen battery low\n"); fallthrough; case BATTERY_NO_REPORT: case BATTERY_GOOD: x = le16_to_cpup((__le16 *)&data[2]); y = le16_to_cpup((__le16 *)&data[4]); /* pen-up event */ if (x == 0 && y == 0) break; input_report_key(dev, BTN_TOUCH, data[1] & PEN_TIP); input_report_key(dev, BTN_RIGHT, data[1] & PEN_BUTTON_PRESSED); input_report_key(dev, BTN_TOOL_PEN, 1); input_report_abs(dev, ABS_X, (s16)x); input_report_abs(dev, ABS_Y, y); input_sync(dev); break; default: dev_warn_once(&pegasus->usbdev->dev, "unknown answer from device\n"); } } static void pegasus_irq(struct urb *urb) { struct pegasus *pegasus = urb->context; struct usb_device *dev = pegasus->usbdev; int retval; switch (urb->status) { case 0: pegasus_parse_packet(pegasus); usb_mark_last_busy(pegasus->usbdev); break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: dev_err(&dev->dev, "%s - urb shutting down with status: %d", __func__, urb->status); return; default: dev_err(&dev->dev, "%s - nonzero urb status received: %d", __func__, urb->status); break; } retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(&dev->dev, "%s - usb_submit_urb failed with result %d", __func__, retval); } static void pegasus_init(struct work_struct *work) { struct pegasus *pegasus = container_of(work, struct pegasus, init); int error; error = pegasus_set_mode(pegasus, PEN_MODE_XY, NOTETAKER_LED_MOUSE); if (error) dev_err(&pegasus->usbdev->dev, "pegasus_set_mode error: %d\n", error); } static int pegasus_open(struct input_dev *dev) { struct pegasus *pegasus = input_get_drvdata(dev); int error; error = usb_autopm_get_interface(pegasus->intf); if (error) return error; mutex_lock(&pegasus->pm_mutex); pegasus->irq->dev = pegasus->usbdev; if (usb_submit_urb(pegasus->irq, GFP_KERNEL)) { error = -EIO; goto err_autopm_put; } error = pegasus_set_mode(pegasus, PEN_MODE_XY, NOTETAKER_LED_MOUSE); if (error) goto err_kill_urb; pegasus->is_open = true; mutex_unlock(&pegasus->pm_mutex); return 0; err_kill_urb: usb_kill_urb(pegasus->irq); cancel_work_sync(&pegasus->init); err_autopm_put: mutex_unlock(&pegasus->pm_mutex); usb_autopm_put_interface(pegasus->intf); return error; } static void pegasus_close(struct input_dev *dev) { struct pegasus *pegasus = input_get_drvdata(dev); mutex_lock(&pegasus->pm_mutex); usb_kill_urb(pegasus->irq); cancel_work_sync(&pegasus->init); pegasus->is_open = false; mutex_unlock(&pegasus->pm_mutex); usb_autopm_put_interface(pegasus->intf); } static int pegasus_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *dev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *endpoint; struct pegasus *pegasus; struct input_dev *input_dev; int error; int pipe; /* We control interface 0 */ if (intf->cur_altsetting->desc.bInterfaceNumber >= 1) return -ENODEV; /* Sanity check that the device has an endpoint */ if (intf->cur_altsetting->desc.bNumEndpoints < 1) { dev_err(&intf->dev, "Invalid number of endpoints\n"); return -EINVAL; } endpoint = &intf->cur_altsetting->endpoint[0].desc; pegasus = kzalloc(sizeof(*pegasus), GFP_KERNEL); input_dev = input_allocate_device(); if (!pegasus || !input_dev) { error = -ENOMEM; goto err_free_mem; } mutex_init(&pegasus->pm_mutex); pegasus->usbdev = dev; pegasus->dev = input_dev; pegasus->intf = intf; pipe = usb_rcvintpipe(dev, endpoint->bEndpointAddress); /* Sanity check that pipe's type matches endpoint's type */ if (usb_pipe_type_check(dev, pipe)) { error = -EINVAL; goto err_free_mem; } pegasus->data_len = usb_maxpacket(dev, pipe); pegasus->data = usb_alloc_coherent(dev, pegasus->data_len, GFP_KERNEL, &pegasus->data_dma); if (!pegasus->data) { error = -ENOMEM; goto err_free_mem; } pegasus->irq = usb_alloc_urb(0, GFP_KERNEL); if (!pegasus->irq) { error = -ENOMEM; goto err_free_dma; } usb_fill_int_urb(pegasus->irq, dev, pipe, pegasus->data, pegasus->data_len, pegasus_irq, pegasus, endpoint->bInterval); pegasus->irq->transfer_dma = pegasus->data_dma; pegasus->irq->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; if (dev->manufacturer) strscpy(pegasus->name, dev->manufacturer, sizeof(pegasus->name)); if (dev->product) { if (dev->manufacturer) strlcat(pegasus->name, " ", sizeof(pegasus->name)); strlcat(pegasus->name, dev->product, sizeof(pegasus->name)); } if (!strlen(pegasus->name)) snprintf(pegasus->name, sizeof(pegasus->name), "USB Pegasus Device %04x:%04x", le16_to_cpu(dev->descriptor.idVendor), le16_to_cpu(dev->descriptor.idProduct)); usb_make_path(dev, pegasus->phys, sizeof(pegasus->phys)); strlcat(pegasus->phys, "/input0", sizeof(pegasus->phys)); INIT_WORK(&pegasus->init, pegasus_init); usb_set_intfdata(intf, pegasus); input_dev->name = pegasus->name; input_dev->phys = pegasus->phys; usb_to_input_id(dev, &input_dev->id); input_dev->dev.parent = &intf->dev; input_set_drvdata(input_dev, pegasus); input_dev->open = pegasus_open; input_dev->close = pegasus_close; __set_bit(EV_ABS, input_dev->evbit); __set_bit(EV_KEY, input_dev->evbit); __set_bit(ABS_X, input_dev->absbit); __set_bit(ABS_Y, input_dev->absbit); __set_bit(BTN_TOUCH, input_dev->keybit); __set_bit(BTN_RIGHT, input_dev->keybit); __set_bit(BTN_TOOL_PEN, input_dev->keybit); __set_bit(INPUT_PROP_DIRECT, input_dev->propbit); __set_bit(INPUT_PROP_POINTER, input_dev->propbit); input_set_abs_params(input_dev, ABS_X, -1500, 1500, 8, 0); input_set_abs_params(input_dev, ABS_Y, 1600, 3000, 8, 0); error = input_register_device(pegasus->dev); if (error) goto err_free_urb; return 0; err_free_urb: usb_free_urb(pegasus->irq); err_free_dma: usb_free_coherent(dev, pegasus->data_len, pegasus->data, pegasus->data_dma); err_free_mem: input_free_device(input_dev); kfree(pegasus); usb_set_intfdata(intf, NULL); return error; } static void pegasus_disconnect(struct usb_interface *intf) { struct pegasus *pegasus = usb_get_intfdata(intf); input_unregister_device(pegasus->dev); usb_free_urb(pegasus->irq); usb_free_coherent(interface_to_usbdev(intf), pegasus->data_len, pegasus->data, pegasus->data_dma); kfree(pegasus); usb_set_intfdata(intf, NULL); } static int pegasus_suspend(struct usb_interface *intf, pm_message_t message) { struct pegasus *pegasus = usb_get_intfdata(intf); mutex_lock(&pegasus->pm_mutex); usb_kill_urb(pegasus->irq); cancel_work_sync(&pegasus->init); mutex_unlock(&pegasus->pm_mutex); return 0; } static int pegasus_resume(struct usb_interface *intf) { struct pegasus *pegasus = usb_get_intfdata(intf); int retval = 0; mutex_lock(&pegasus->pm_mutex); if (pegasus->is_open && usb_submit_urb(pegasus->irq, GFP_NOIO) < 0) retval = -EIO; mutex_unlock(&pegasus->pm_mutex); return retval; } static int pegasus_reset_resume(struct usb_interface *intf) { struct pegasus *pegasus = usb_get_intfdata(intf); int retval = 0; mutex_lock(&pegasus->pm_mutex); if (pegasus->is_open) { retval = pegasus_set_mode(pegasus, PEN_MODE_XY, NOTETAKER_LED_MOUSE); if (!retval && usb_submit_urb(pegasus->irq, GFP_NOIO) < 0) retval = -EIO; } mutex_unlock(&pegasus->pm_mutex); return retval; } static const struct usb_device_id pegasus_ids[] = { { USB_DEVICE(USB_VENDOR_ID_PEGASUSTECH, USB_DEVICE_ID_PEGASUS_NOTETAKER_EN100) }, { } }; MODULE_DEVICE_TABLE(usb, pegasus_ids); static struct usb_driver pegasus_driver = { .name = "pegasus_notetaker", .probe = pegasus_probe, .disconnect = pegasus_disconnect, .suspend = pegasus_suspend, .resume = pegasus_resume, .reset_resume = pegasus_reset_resume, .id_table = pegasus_ids, .supports_autosuspend = 1, }; module_usb_driver(pegasus_driver); MODULE_AUTHOR("Martin Kepplinger <martink@posteo.de>"); MODULE_DESCRIPTION("Pegasus Mobile Notetaker Pen tablet driver"); MODULE_LICENSE("GPL");
7 1 5 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 // SPDX-License-Identifier: GPL-2.0-only /* * xt_u32 - kernel module to match u32 packet content * * Original author: Don Cohen <don@isis.cs3-inc.com> * (C) CC Computer Consultants GmbH, 2007 */ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/spinlock.h> #include <linux/skbuff.h> #include <linux/types.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_u32.h> static bool u32_match_it(const struct xt_u32 *data, const struct sk_buff *skb) { const struct xt_u32_test *ct; unsigned int testind; unsigned int nnums; unsigned int nvals; unsigned int i; __be32 n; u_int32_t pos; u_int32_t val; u_int32_t at; /* * Small example: "0 >> 28 == 4 && 8 & 0xFF0000 >> 16 = 6, 17" * (=IPv4 and (TCP or UDP)). Outer loop runs over the "&&" operands. */ for (testind = 0; testind < data->ntests; ++testind) { ct = &data->tests[testind]; at = 0; pos = ct->location[0].number; if (skb->len < 4 || pos > skb->len - 4) return false; if (skb_copy_bits(skb, pos, &n, sizeof(n)) < 0) BUG(); val = ntohl(n); nnums = ct->nnums; /* Inner loop runs over "&", "<<", ">>" and "@" operands */ for (i = 1; i < nnums; ++i) { u_int32_t number = ct->location[i].number; switch (ct->location[i].nextop) { case XT_U32_AND: val &= number; break; case XT_U32_LEFTSH: val <<= number; break; case XT_U32_RIGHTSH: val >>= number; break; case XT_U32_AT: if (at + val < at) return false; at += val; pos = number; if (at + 4 < at || skb->len < at + 4 || pos > skb->len - at - 4) return false; if (skb_copy_bits(skb, at + pos, &n, sizeof(n)) < 0) BUG(); val = ntohl(n); break; } } /* Run over the "," and ":" operands */ nvals = ct->nvalues; for (i = 0; i < nvals; ++i) if (ct->value[i].min <= val && val <= ct->value[i].max) break; if (i >= ct->nvalues) return false; } return true; } static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_u32 *data = par->matchinfo; bool ret; ret = u32_match_it(data, skb); return ret ^ data->invert; } static int u32_mt_checkentry(const struct xt_mtchk_param *par) { const struct xt_u32 *data = par->matchinfo; const struct xt_u32_test *ct; unsigned int i; if (data->ntests > ARRAY_SIZE(data->tests)) return -EINVAL; for (i = 0; i < data->ntests; ++i) { ct = &data->tests[i]; if (ct->nnums > ARRAY_SIZE(ct->location) || ct->nvalues > ARRAY_SIZE(ct->value)) return -EINVAL; } return 0; } static struct xt_match xt_u32_mt_reg __read_mostly = { .name = "u32", .revision = 0, .family = NFPROTO_UNSPEC, .match = u32_mt, .checkentry = u32_mt_checkentry, .matchsize = sizeof(struct xt_u32), .me = THIS_MODULE, }; static int __init u32_mt_init(void) { return xt_register_match(&xt_u32_mt_reg); } static void __exit u32_mt_exit(void) { xt_unregister_match(&xt_u32_mt_reg); } module_init(u32_mt_init); module_exit(u32_mt_exit); MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: arbitrary byte matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_u32"); MODULE_ALIAS("ip6t_u32");
13 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 // SPDX-License-Identifier: GPL-2.0-or-later /* Instantiate a public key crypto key from an X.509 Certificate * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) "X.509: "fmt #include <crypto/hash.h> #include <keys/asymmetric-parser.h> #include <keys/asymmetric-subtype.h> #include <keys/system_keyring.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/string.h> #include "asymmetric_keys.h" #include "x509_parser.h" /* * Set up the signature parameters in an X.509 certificate. This involves * digesting the signed data and extracting the signature. */ int x509_get_sig_params(struct x509_certificate *cert) { struct public_key_signature *sig = cert->sig; struct crypto_shash *tfm; struct shash_desc *desc; size_t desc_size; int ret; pr_devel("==>%s()\n", __func__); sig->s = kmemdup(cert->raw_sig, cert->raw_sig_size, GFP_KERNEL); if (!sig->s) return -ENOMEM; sig->s_size = cert->raw_sig_size; /* Allocate the hashing algorithm we're going to need and find out how * big the hash operational data will be. */ tfm = crypto_alloc_shash(sig->hash_algo, 0, 0); if (IS_ERR(tfm)) { if (PTR_ERR(tfm) == -ENOENT) { cert->unsupported_sig = true; return 0; } return PTR_ERR(tfm); } desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); sig->digest_size = crypto_shash_digestsize(tfm); ret = -ENOMEM; sig->digest = kmalloc(sig->digest_size, GFP_KERNEL); if (!sig->digest) goto error; desc = kzalloc(desc_size, GFP_KERNEL); if (!desc) goto error; desc->tfm = tfm; ret = crypto_shash_digest(desc, cert->tbs, cert->tbs_size, sig->digest); if (ret < 0) goto error_2; ret = is_hash_blacklisted(sig->digest, sig->digest_size, BLACKLIST_HASH_X509_TBS); if (ret == -EKEYREJECTED) { pr_err("Cert %*phN is blacklisted\n", sig->digest_size, sig->digest); cert->blacklisted = true; ret = 0; } error_2: kfree(desc); error: crypto_free_shash(tfm); pr_devel("<==%s() = %d\n", __func__, ret); return ret; } /* * Check for self-signedness in an X.509 cert and if found, check the signature * immediately if we can. */ int x509_check_for_self_signed(struct x509_certificate *cert) { int ret = 0; pr_devel("==>%s()\n", __func__); if (cert->raw_subject_size != cert->raw_issuer_size || memcmp(cert->raw_subject, cert->raw_issuer, cert->raw_issuer_size) != 0) goto not_self_signed; if (cert->sig->auth_ids[0] || cert->sig->auth_ids[1]) { /* If the AKID is present it may have one or two parts. If * both are supplied, both must match. */ bool a = asymmetric_key_id_same(cert->skid, cert->sig->auth_ids[1]); bool b = asymmetric_key_id_same(cert->id, cert->sig->auth_ids[0]); if (!a && !b) goto not_self_signed; ret = -EKEYREJECTED; if (((a && !b) || (b && !a)) && cert->sig->auth_ids[0] && cert->sig->auth_ids[1]) goto out; } if (cert->unsupported_sig) { ret = 0; goto out; } ret = public_key_verify_signature(cert->pub, cert->sig); if (ret < 0) { if (ret == -ENOPKG) { cert->unsupported_sig = true; ret = 0; } goto out; } pr_devel("Cert Self-signature verified"); cert->self_signed = true; out: pr_devel("<==%s() = %d\n", __func__, ret); return ret; not_self_signed: pr_devel("<==%s() = 0 [not]\n", __func__); return 0; } /* * Attempt to parse a data blob for a key as an X509 certificate. */ static int x509_key_preparse(struct key_preparsed_payload *prep) { struct x509_certificate *cert __free(x509_free_certificate); struct asymmetric_key_ids *kids __free(kfree) = NULL; char *p, *desc __free(kfree) = NULL; const char *q; size_t srlen, sulen; cert = x509_cert_parse(prep->data, prep->datalen); if (IS_ERR(cert)) return PTR_ERR(cert); pr_devel("Cert Issuer: %s\n", cert->issuer); pr_devel("Cert Subject: %s\n", cert->subject); pr_devel("Cert Key Algo: %s\n", cert->pub->pkey_algo); pr_devel("Cert Valid period: %lld-%lld\n", cert->valid_from, cert->valid_to); cert->pub->id_type = "X509"; if (cert->unsupported_sig) { public_key_signature_free(cert->sig); cert->sig = NULL; } else { pr_devel("Cert Signature: %s + %s\n", cert->sig->pkey_algo, cert->sig->hash_algo); } /* Don't permit addition of blacklisted keys */ if (cert->blacklisted) return -EKEYREJECTED; /* Propose a description */ sulen = strlen(cert->subject); if (cert->raw_skid) { srlen = cert->raw_skid_size; q = cert->raw_skid; } else { srlen = cert->raw_serial_size; q = cert->raw_serial; } desc = kmalloc(sulen + 2 + srlen * 2 + 1, GFP_KERNEL); if (!desc) return -ENOMEM; p = memcpy(desc, cert->subject, sulen); p += sulen; *p++ = ':'; *p++ = ' '; p = bin2hex(p, q, srlen); *p = 0; kids = kmalloc(sizeof(struct asymmetric_key_ids), GFP_KERNEL); if (!kids) return -ENOMEM; kids->id[0] = cert->id; kids->id[1] = cert->skid; kids->id[2] = asymmetric_key_generate_id(cert->raw_subject, cert->raw_subject_size, "", 0); if (IS_ERR(kids->id[2])) return PTR_ERR(kids->id[2]); /* We're pinning the module by being linked against it */ __module_get(public_key_subtype.owner); prep->payload.data[asym_subtype] = &public_key_subtype; prep->payload.data[asym_key_ids] = kids; prep->payload.data[asym_crypto] = cert->pub; prep->payload.data[asym_auth] = cert->sig; prep->description = desc; prep->quotalen = 100; /* We've finished with the certificate */ cert->pub = NULL; cert->id = NULL; cert->skid = NULL; cert->sig = NULL; desc = NULL; kids = NULL; return 0; } static struct asymmetric_key_parser x509_key_parser = { .owner = THIS_MODULE, .name = "x509", .parse = x509_key_preparse, }; /* * Module stuff */ static int __init x509_key_init(void) { return register_asymmetric_key_parser(&x509_key_parser); } static void __exit x509_key_exit(void) { unregister_asymmetric_key_parser(&x509_key_parser); } module_init(x509_key_init); module_exit(x509_key_exit); MODULE_DESCRIPTION("X.509 certificate parser"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL");
25 29 25 18 4 24 1 25 12 52 18 39 53 18 13 53 54 53 52 39 39 39 39 25 25 24 25 1 24 25 25 25 25 25 24 2 23 27 17 24 27 27 27 14 15 15 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 /************************************************************************** * * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA. * Copyright 2016 Intel Corporation * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * **************************************************************************/ /* * Generic simple memory manager implementation. Intended to be used as a base * class implementation for more advanced memory managers. * * Note that the algorithm used is quite simple and there might be substantial * performance gains if a smarter free list is implemented. Currently it is * just an unordered stack of free regions. This could easily be improved if * an RB-tree is used instead. At least if we expect heavy fragmentation. * * Aligned allocations can also see improvement. * * Authors: * Thomas Hellström <thomas-at-tungstengraphics-dot-com> */ #include <linux/export.h> #include <linux/interval_tree_generic.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/stacktrace.h> #include <drm/drm_mm.h> /** * DOC: Overview * * drm_mm provides a simple range allocator. The drivers are free to use the * resource allocator from the linux core if it suits them, the upside of drm_mm * is that it's in the DRM core. Which means that it's easier to extend for * some of the crazier special purpose needs of gpus. * * The main data struct is &drm_mm, allocations are tracked in &drm_mm_node. * Drivers are free to embed either of them into their own suitable * datastructures. drm_mm itself will not do any memory allocations of its own, * so if drivers choose not to embed nodes they need to still allocate them * themselves. * * The range allocator also supports reservation of preallocated blocks. This is * useful for taking over initial mode setting configurations from the firmware, * where an object needs to be created which exactly matches the firmware's * scanout target. As long as the range is still free it can be inserted anytime * after the allocator is initialized, which helps with avoiding looped * dependencies in the driver load sequence. * * drm_mm maintains a stack of most recently freed holes, which of all * simplistic datastructures seems to be a fairly decent approach to clustering * allocations and avoiding too much fragmentation. This means free space * searches are O(num_holes). Given that all the fancy features drm_mm supports * something better would be fairly complex and since gfx thrashing is a fairly * steep cliff not a real concern. Removing a node again is O(1). * * drm_mm supports a few features: Alignment and range restrictions can be * supplied. Furthermore every &drm_mm_node has a color value (which is just an * opaque unsigned long) which in conjunction with a driver callback can be used * to implement sophisticated placement restrictions. The i915 DRM driver uses * this to implement guard pages between incompatible caching domains in the * graphics TT. * * Two behaviors are supported for searching and allocating: bottom-up and * top-down. The default is bottom-up. Top-down allocation can be used if the * memory area has different restrictions, or just to reduce fragmentation. * * Finally iteration helpers to walk all nodes and all holes are provided as are * some basic allocator dumpers for debugging. * * Note that this range allocator is not thread-safe, drivers need to protect * modifications with their own locking. The idea behind this is that for a full * memory manager additional data needs to be protected anyway, hence internal * locking would be fully redundant. */ #ifdef CONFIG_DRM_DEBUG_MM #include <linux/stackdepot.h> #define STACKDEPTH 32 #define BUFSZ 4096 static noinline void save_stack(struct drm_mm_node *node) { unsigned long entries[STACKDEPTH]; unsigned int n; n = stack_trace_save(entries, ARRAY_SIZE(entries), 1); /* May be called under spinlock, so avoid sleeping */ node->stack = stack_depot_save(entries, n, GFP_NOWAIT); } static void show_leaks(struct drm_mm *mm) { struct drm_mm_node *node; char *buf; buf = kmalloc(BUFSZ, GFP_KERNEL); if (!buf) return; list_for_each_entry(node, drm_mm_nodes(mm), node_list) { if (!node->stack) { DRM_ERROR("node [%08llx + %08llx]: unknown owner\n", node->start, node->size); continue; } stack_depot_snprint(node->stack, buf, BUFSZ, 0); DRM_ERROR("node [%08llx + %08llx]: inserted at\n%s", node->start, node->size, buf); } kfree(buf); } #undef STACKDEPTH #undef BUFSZ #else static void save_stack(struct drm_mm_node *node) { } static void show_leaks(struct drm_mm *mm) { } #endif #define START(node) ((node)->start) #define LAST(node) ((node)->start + (node)->size - 1) INTERVAL_TREE_DEFINE(struct drm_mm_node, rb, u64, __subtree_last, START, LAST, static inline, drm_mm_interval_tree) struct drm_mm_node * __drm_mm_interval_first(const struct drm_mm *mm, u64 start, u64 last) { return drm_mm_interval_tree_iter_first((struct rb_root_cached *)&mm->interval_tree, start, last) ?: (struct drm_mm_node *)&mm->head_node; } EXPORT_SYMBOL(__drm_mm_interval_first); static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node, struct drm_mm_node *node) { struct drm_mm *mm = hole_node->mm; struct rb_node **link, *rb; struct drm_mm_node *parent; bool leftmost; node->__subtree_last = LAST(node); if (drm_mm_node_allocated(hole_node)) { rb = &hole_node->rb; while (rb) { parent = rb_entry(rb, struct drm_mm_node, rb); if (parent->__subtree_last >= node->__subtree_last) break; parent->__subtree_last = node->__subtree_last; rb = rb_parent(rb); } rb = &hole_node->rb; link = &hole_node->rb.rb_right; leftmost = false; } else { rb = NULL; link = &mm->interval_tree.rb_root.rb_node; leftmost = true; } while (*link) { rb = *link; parent = rb_entry(rb, struct drm_mm_node, rb); if (parent->__subtree_last < node->__subtree_last) parent->__subtree_last = node->__subtree_last; if (node->start < parent->start) { link = &parent->rb.rb_left; } else { link = &parent->rb.rb_right; leftmost = false; } } rb_link_node(&node->rb, rb, link); rb_insert_augmented_cached(&node->rb, &mm->interval_tree, leftmost, &drm_mm_interval_tree_augment); } #define HOLE_SIZE(NODE) ((NODE)->hole_size) #define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE)) static u64 rb_to_hole_size(struct rb_node *rb) { return rb_entry(rb, struct drm_mm_node, rb_hole_size)->hole_size; } static void insert_hole_size(struct rb_root_cached *root, struct drm_mm_node *node) { struct rb_node **link = &root->rb_root.rb_node, *rb = NULL; u64 x = node->hole_size; bool first = true; while (*link) { rb = *link; if (x > rb_to_hole_size(rb)) { link = &rb->rb_left; } else { link = &rb->rb_right; first = false; } } rb_link_node(&node->rb_hole_size, rb, link); rb_insert_color_cached(&node->rb_hole_size, root, first); } RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks, struct drm_mm_node, rb_hole_addr, u64, subtree_max_hole, HOLE_SIZE) static void insert_hole_addr(struct rb_root *root, struct drm_mm_node *node) { struct rb_node **link = &root->rb_node, *rb_parent = NULL; u64 start = HOLE_ADDR(node), subtree_max_hole = node->subtree_max_hole; struct drm_mm_node *parent; while (*link) { rb_parent = *link; parent = rb_entry(rb_parent, struct drm_mm_node, rb_hole_addr); if (parent->subtree_max_hole < subtree_max_hole) parent->subtree_max_hole = subtree_max_hole; if (start < HOLE_ADDR(parent)) link = &parent->rb_hole_addr.rb_left; else link = &parent->rb_hole_addr.rb_right; } rb_link_node(&node->rb_hole_addr, rb_parent, link); rb_insert_augmented(&node->rb_hole_addr, root, &augment_callbacks); } static void add_hole(struct drm_mm_node *node) { struct drm_mm *mm = node->mm; node->hole_size = __drm_mm_hole_node_end(node) - __drm_mm_hole_node_start(node); node->subtree_max_hole = node->hole_size; DRM_MM_BUG_ON(!drm_mm_hole_follows(node)); insert_hole_size(&mm->holes_size, node); insert_hole_addr(&mm->holes_addr, node); list_add(&node->hole_stack, &mm->hole_stack); } static void rm_hole(struct drm_mm_node *node) { DRM_MM_BUG_ON(!drm_mm_hole_follows(node)); list_del(&node->hole_stack); rb_erase_cached(&node->rb_hole_size, &node->mm->holes_size); rb_erase_augmented(&node->rb_hole_addr, &node->mm->holes_addr, &augment_callbacks); node->hole_size = 0; node->subtree_max_hole = 0; DRM_MM_BUG_ON(drm_mm_hole_follows(node)); } static inline struct drm_mm_node *rb_hole_size_to_node(struct rb_node *rb) { return rb_entry_safe(rb, struct drm_mm_node, rb_hole_size); } static inline struct drm_mm_node *rb_hole_addr_to_node(struct rb_node *rb) { return rb_entry_safe(rb, struct drm_mm_node, rb_hole_addr); } static struct drm_mm_node *best_hole(struct drm_mm *mm, u64 size) { struct rb_node *rb = mm->holes_size.rb_root.rb_node; struct drm_mm_node *best = NULL; do { struct drm_mm_node *node = rb_entry(rb, struct drm_mm_node, rb_hole_size); if (size <= node->hole_size) { best = node; rb = rb->rb_right; } else { rb = rb->rb_left; } } while (rb); return best; } static bool usable_hole_addr(struct rb_node *rb, u64 size) { return rb && rb_hole_addr_to_node(rb)->subtree_max_hole >= size; } static struct drm_mm_node *find_hole_addr(struct drm_mm *mm, u64 addr, u64 size) { struct rb_node *rb = mm->holes_addr.rb_node; struct drm_mm_node *node = NULL; while (rb) { u64 hole_start; if (!usable_hole_addr(rb, size)) break; node = rb_hole_addr_to_node(rb); hole_start = __drm_mm_hole_node_start(node); if (addr < hole_start) rb = node->rb_hole_addr.rb_left; else if (addr > hole_start + node->hole_size) rb = node->rb_hole_addr.rb_right; else break; } return node; } static struct drm_mm_node * first_hole(struct drm_mm *mm, u64 start, u64 end, u64 size, enum drm_mm_insert_mode mode) { switch (mode) { default: case DRM_MM_INSERT_BEST: return best_hole(mm, size); case DRM_MM_INSERT_LOW: return find_hole_addr(mm, start, size); case DRM_MM_INSERT_HIGH: return find_hole_addr(mm, end, size); case DRM_MM_INSERT_EVICT: return list_first_entry_or_null(&mm->hole_stack, struct drm_mm_node, hole_stack); } } /** * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions * @name: name of function to declare * @first: first rb member to traverse (either rb_left or rb_right). * @last: last rb member to traverse (either rb_right or rb_left). * * This macro declares a function to return the next hole of the addr rb tree. * While traversing the tree we take the searched size into account and only * visit branches with potential big enough holes. */ #define DECLARE_NEXT_HOLE_ADDR(name, first, last) \ static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size) \ { \ struct rb_node *parent, *node = &entry->rb_hole_addr; \ \ if (!entry || RB_EMPTY_NODE(node)) \ return NULL; \ \ if (usable_hole_addr(node->first, size)) { \ node = node->first; \ while (usable_hole_addr(node->last, size)) \ node = node->last; \ return rb_hole_addr_to_node(node); \ } \ \ while ((parent = rb_parent(node)) && node == parent->first) \ node = parent; \ \ return rb_hole_addr_to_node(parent); \ } DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right) DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left) static struct drm_mm_node * next_hole(struct drm_mm *mm, struct drm_mm_node *node, u64 size, enum drm_mm_insert_mode mode) { switch (mode) { default: case DRM_MM_INSERT_BEST: return rb_hole_size_to_node(rb_prev(&node->rb_hole_size)); case DRM_MM_INSERT_LOW: return next_hole_low_addr(node, size); case DRM_MM_INSERT_HIGH: return next_hole_high_addr(node, size); case DRM_MM_INSERT_EVICT: node = list_next_entry(node, hole_stack); return &node->hole_stack == &mm->hole_stack ? NULL : node; } } /** * drm_mm_reserve_node - insert an pre-initialized node * @mm: drm_mm allocator to insert @node into * @node: drm_mm_node to insert * * This functions inserts an already set-up &drm_mm_node into the allocator, * meaning that start, size and color must be set by the caller. All other * fields must be cleared to 0. This is useful to initialize the allocator with * preallocated objects which must be set-up before the range allocator can be * set-up, e.g. when taking over a firmware framebuffer. * * Returns: * 0 on success, -ENOSPC if there's no hole where @node is. */ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node) { struct drm_mm_node *hole; u64 hole_start, hole_end; u64 adj_start, adj_end; u64 end; end = node->start + node->size; if (unlikely(end <= node->start)) return -ENOSPC; /* Find the relevant hole to add our node to */ hole = find_hole_addr(mm, node->start, 0); if (!hole) return -ENOSPC; adj_start = hole_start = __drm_mm_hole_node_start(hole); adj_end = hole_end = hole_start + hole->hole_size; if (mm->color_adjust) mm->color_adjust(hole, node->color, &adj_start, &adj_end); if (adj_start > node->start || adj_end < end) return -ENOSPC; node->mm = mm; __set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags); list_add(&node->node_list, &hole->node_list); drm_mm_interval_tree_add_node(hole, node); node->hole_size = 0; rm_hole(hole); if (node->start > hole_start) add_hole(hole); if (end < hole_end) add_hole(node); save_stack(node); return 0; } EXPORT_SYMBOL(drm_mm_reserve_node); static u64 rb_to_hole_size_or_zero(struct rb_node *rb) { return rb ? rb_to_hole_size(rb) : 0; } /** * drm_mm_insert_node_in_range - ranged search for space and insert @node * @mm: drm_mm to allocate from * @node: preallocate node to insert * @size: size of the allocation * @alignment: alignment of the allocation * @color: opaque tag value to use for this node * @range_start: start of the allowed range for this node * @range_end: end of the allowed range for this node * @mode: fine-tune the allocation search and placement * * The preallocated @node must be cleared to 0. * * Returns: * 0 on success, -ENOSPC if there's no suitable hole. */ int drm_mm_insert_node_in_range(struct drm_mm * const mm, struct drm_mm_node * const node, u64 size, u64 alignment, unsigned long color, u64 range_start, u64 range_end, enum drm_mm_insert_mode mode) { struct drm_mm_node *hole; u64 remainder_mask; bool once; DRM_MM_BUG_ON(range_start > range_end); if (unlikely(size == 0 || range_end - range_start < size)) return -ENOSPC; if (rb_to_hole_size_or_zero(rb_first_cached(&mm->holes_size)) < size) return -ENOSPC; if (alignment <= 1) alignment = 0; once = mode & DRM_MM_INSERT_ONCE; mode &= ~DRM_MM_INSERT_ONCE; remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; for (hole = first_hole(mm, range_start, range_end, size, mode); hole; hole = once ? NULL : next_hole(mm, hole, size, mode)) { u64 hole_start = __drm_mm_hole_node_start(hole); u64 hole_end = hole_start + hole->hole_size; u64 adj_start, adj_end; u64 col_start, col_end; if (mode == DRM_MM_INSERT_LOW && hole_start >= range_end) break; if (mode == DRM_MM_INSERT_HIGH && hole_end <= range_start) break; col_start = hole_start; col_end = hole_end; if (mm->color_adjust) mm->color_adjust(hole, color, &col_start, &col_end); adj_start = max(col_start, range_start); adj_end = min(col_end, range_end); if (adj_end <= adj_start || adj_end - adj_start < size) continue; if (mode == DRM_MM_INSERT_HIGH) adj_start = adj_end - size; if (alignment) { u64 rem; if (likely(remainder_mask)) rem = adj_start & remainder_mask; else div64_u64_rem(adj_start, alignment, &rem); if (rem) { adj_start -= rem; if (mode != DRM_MM_INSERT_HIGH) adj_start += alignment; if (adj_start < max(col_start, range_start) || min(col_end, range_end) - adj_start < size) continue; if (adj_end <= adj_start || adj_end - adj_start < size) continue; } } node->mm = mm; node->size = size; node->start = adj_start; node->color = color; node->hole_size = 0; __set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags); list_add(&node->node_list, &hole->node_list); drm_mm_interval_tree_add_node(hole, node); rm_hole(hole); if (adj_start > hole_start) add_hole(hole); if (adj_start + size < hole_end) add_hole(node); save_stack(node); return 0; } return -ENOSPC; } EXPORT_SYMBOL(drm_mm_insert_node_in_range); static inline bool drm_mm_node_scanned_block(const struct drm_mm_node *node) { return test_bit(DRM_MM_NODE_SCANNED_BIT, &node->flags); } /** * drm_mm_remove_node - Remove a memory node from the allocator. * @node: drm_mm_node to remove * * This just removes a node from its drm_mm allocator. The node does not need to * be cleared again before it can be re-inserted into this or any other drm_mm * allocator. It is a bug to call this function on a unallocated node. */ void drm_mm_remove_node(struct drm_mm_node *node) { struct drm_mm *mm = node->mm; struct drm_mm_node *prev_node; DRM_MM_BUG_ON(!drm_mm_node_allocated(node)); DRM_MM_BUG_ON(drm_mm_node_scanned_block(node)); prev_node = list_prev_entry(node, node_list); if (drm_mm_hole_follows(node)) rm_hole(node); drm_mm_interval_tree_remove(node, &mm->interval_tree); list_del(&node->node_list); if (drm_mm_hole_follows(prev_node)) rm_hole(prev_node); add_hole(prev_node); clear_bit_unlock(DRM_MM_NODE_ALLOCATED_BIT, &node->flags); } EXPORT_SYMBOL(drm_mm_remove_node); /** * DOC: lru scan roster * * Very often GPUs need to have continuous allocations for a given object. When * evicting objects to make space for a new one it is therefore not most * efficient when we simply start to select all objects from the tail of an LRU * until there's a suitable hole: Especially for big objects or nodes that * otherwise have special allocation constraints there's a good chance we evict * lots of (smaller) objects unnecessarily. * * The DRM range allocator supports this use-case through the scanning * interfaces. First a scan operation needs to be initialized with * drm_mm_scan_init() or drm_mm_scan_init_with_range(). The driver adds * objects to the roster, probably by walking an LRU list, but this can be * freely implemented. Eviction candidates are added using * drm_mm_scan_add_block() until a suitable hole is found or there are no * further evictable objects. Eviction roster metadata is tracked in &struct * drm_mm_scan. * * The driver must walk through all objects again in exactly the reverse * order to restore the allocator state. Note that while the allocator is used * in the scan mode no other operation is allowed. * * Finally the driver evicts all objects selected (drm_mm_scan_remove_block() * reported true) in the scan, and any overlapping nodes after color adjustment * (drm_mm_scan_color_evict()). Adding and removing an object is O(1), and * since freeing a node is also O(1) the overall complexity is * O(scanned_objects). So like the free stack which needs to be walked before a * scan operation even begins this is linear in the number of objects. It * doesn't seem to hurt too badly. */ /** * drm_mm_scan_init_with_range - initialize range-restricted lru scanning * @scan: scan state * @mm: drm_mm to scan * @size: size of the allocation * @alignment: alignment of the allocation * @color: opaque tag value to use for the allocation * @start: start of the allowed range for the allocation * @end: end of the allowed range for the allocation * @mode: fine-tune the allocation search and placement * * This simply sets up the scanning routines with the parameters for the desired * hole. * * Warning: * As long as the scan list is non-empty, no other operations than * adding/removing nodes to/from the scan list are allowed. */ void drm_mm_scan_init_with_range(struct drm_mm_scan *scan, struct drm_mm *mm, u64 size, u64 alignment, unsigned long color, u64 start, u64 end, enum drm_mm_insert_mode mode) { DRM_MM_BUG_ON(start >= end); DRM_MM_BUG_ON(!size || size > end - start); DRM_MM_BUG_ON(mm->scan_active); scan->mm = mm; if (alignment <= 1) alignment = 0; scan->color = color; scan->alignment = alignment; scan->remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; scan->size = size; scan->mode = mode; DRM_MM_BUG_ON(end <= start); scan->range_start = start; scan->range_end = end; scan->hit_start = U64_MAX; scan->hit_end = 0; } EXPORT_SYMBOL(drm_mm_scan_init_with_range); /** * drm_mm_scan_add_block - add a node to the scan list * @scan: the active drm_mm scanner * @node: drm_mm_node to add * * Add a node to the scan list that might be freed to make space for the desired * hole. * * Returns: * True if a hole has been found, false otherwise. */ bool drm_mm_scan_add_block(struct drm_mm_scan *scan, struct drm_mm_node *node) { struct drm_mm *mm = scan->mm; struct drm_mm_node *hole; u64 hole_start, hole_end; u64 col_start, col_end; u64 adj_start, adj_end; DRM_MM_BUG_ON(node->mm != mm); DRM_MM_BUG_ON(!drm_mm_node_allocated(node)); DRM_MM_BUG_ON(drm_mm_node_scanned_block(node)); __set_bit(DRM_MM_NODE_SCANNED_BIT, &node->flags); mm->scan_active++; /* Remove this block from the node_list so that we enlarge the hole * (distance between the end of our previous node and the start of * or next), without poisoning the link so that we can restore it * later in drm_mm_scan_remove_block(). */ hole = list_prev_entry(node, node_list); DRM_MM_BUG_ON(list_next_entry(hole, node_list) != node); __list_del_entry(&node->node_list); hole_start = __drm_mm_hole_node_start(hole); hole_end = __drm_mm_hole_node_end(hole); col_start = hole_start; col_end = hole_end; if (mm->color_adjust) mm->color_adjust(hole, scan->color, &col_start, &col_end); adj_start = max(col_start, scan->range_start); adj_end = min(col_end, scan->range_end); if (adj_end <= adj_start || adj_end - adj_start < scan->size) return false; if (scan->mode == DRM_MM_INSERT_HIGH) adj_start = adj_end - scan->size; if (scan->alignment) { u64 rem; if (likely(scan->remainder_mask)) rem = adj_start & scan->remainder_mask; else div64_u64_rem(adj_start, scan->alignment, &rem); if (rem) { adj_start -= rem; if (scan->mode != DRM_MM_INSERT_HIGH) adj_start += scan->alignment; if (adj_start < max(col_start, scan->range_start) || min(col_end, scan->range_end) - adj_start < scan->size) return false; if (adj_end <= adj_start || adj_end - adj_start < scan->size) return false; } } scan->hit_start = adj_start; scan->hit_end = adj_start + scan->size; DRM_MM_BUG_ON(scan->hit_start >= scan->hit_end); DRM_MM_BUG_ON(scan->hit_start < hole_start); DRM_MM_BUG_ON(scan->hit_end > hole_end); return true; } EXPORT_SYMBOL(drm_mm_scan_add_block); /** * drm_mm_scan_remove_block - remove a node from the scan list * @scan: the active drm_mm scanner * @node: drm_mm_node to remove * * Nodes **must** be removed in exactly the reverse order from the scan list as * they have been added (e.g. using list_add() as they are added and then * list_for_each() over that eviction list to remove), otherwise the internal * state of the memory manager will be corrupted. * * When the scan list is empty, the selected memory nodes can be freed. An * immediately following drm_mm_insert_node_in_range_generic() or one of the * simpler versions of that function with !DRM_MM_SEARCH_BEST will then return * the just freed block (because it's at the top of the free_stack list). * * Returns: * True if this block should be evicted, false otherwise. Will always * return false when no hole has been found. */ bool drm_mm_scan_remove_block(struct drm_mm_scan *scan, struct drm_mm_node *node) { struct drm_mm_node *prev_node; DRM_MM_BUG_ON(node->mm != scan->mm); DRM_MM_BUG_ON(!drm_mm_node_scanned_block(node)); __clear_bit(DRM_MM_NODE_SCANNED_BIT, &node->flags); DRM_MM_BUG_ON(!node->mm->scan_active); node->mm->scan_active--; /* During drm_mm_scan_add_block() we decoupled this node leaving * its pointers intact. Now that the caller is walking back along * the eviction list we can restore this block into its rightful * place on the full node_list. To confirm that the caller is walking * backwards correctly we check that prev_node->next == node->next, * i.e. both believe the same node should be on the other side of the * hole. */ prev_node = list_prev_entry(node, node_list); DRM_MM_BUG_ON(list_next_entry(prev_node, node_list) != list_next_entry(node, node_list)); list_add(&node->node_list, &prev_node->node_list); return (node->start + node->size > scan->hit_start && node->start < scan->hit_end); } EXPORT_SYMBOL(drm_mm_scan_remove_block); /** * drm_mm_scan_color_evict - evict overlapping nodes on either side of hole * @scan: drm_mm scan with target hole * * After completing an eviction scan and removing the selected nodes, we may * need to remove a few more nodes from either side of the target hole if * mm.color_adjust is being used. * * Returns: * A node to evict, or NULL if there are no overlapping nodes. */ struct drm_mm_node *drm_mm_scan_color_evict(struct drm_mm_scan *scan) { struct drm_mm *mm = scan->mm; struct drm_mm_node *hole; u64 hole_start, hole_end; DRM_MM_BUG_ON(list_empty(&mm->hole_stack)); if (!mm->color_adjust) return NULL; /* * The hole found during scanning should ideally be the first element * in the hole_stack list, but due to side-effects in the driver it * may not be. */ list_for_each_entry(hole, &mm->hole_stack, hole_stack) { hole_start = __drm_mm_hole_node_start(hole); hole_end = hole_start + hole->hole_size; if (hole_start <= scan->hit_start && hole_end >= scan->hit_end) break; } /* We should only be called after we found the hole previously */ DRM_MM_BUG_ON(&hole->hole_stack == &mm->hole_stack); if (unlikely(&hole->hole_stack == &mm->hole_stack)) return NULL; DRM_MM_BUG_ON(hole_start > scan->hit_start); DRM_MM_BUG_ON(hole_end < scan->hit_end); mm->color_adjust(hole, scan->color, &hole_start, &hole_end); if (hole_start > scan->hit_start) return hole; if (hole_end < scan->hit_end) return list_next_entry(hole, node_list); return NULL; } EXPORT_SYMBOL(drm_mm_scan_color_evict); /** * drm_mm_init - initialize a drm-mm allocator * @mm: the drm_mm structure to initialize * @start: start of the range managed by @mm * @size: end of the range managed by @mm * * Note that @mm must be cleared to 0 before calling this function. */ void drm_mm_init(struct drm_mm *mm, u64 start, u64 size) { DRM_MM_BUG_ON(start + size <= start); mm->color_adjust = NULL; INIT_LIST_HEAD(&mm->hole_stack); mm->interval_tree = RB_ROOT_CACHED; mm->holes_size = RB_ROOT_CACHED; mm->holes_addr = RB_ROOT; /* Clever trick to avoid a special case in the free hole tracking. */ INIT_LIST_HEAD(&mm->head_node.node_list); mm->head_node.flags = 0; mm->head_node.mm = mm; mm->head_node.start = start + size; mm->head_node.size = -size; add_hole(&mm->head_node); mm->scan_active = 0; #ifdef CONFIG_DRM_DEBUG_MM stack_depot_init(); #endif } EXPORT_SYMBOL(drm_mm_init); /** * drm_mm_takedown - clean up a drm_mm allocator * @mm: drm_mm allocator to clean up * * Note that it is a bug to call this function on an allocator which is not * clean. */ void drm_mm_takedown(struct drm_mm *mm) { if (WARN(!drm_mm_clean(mm), "Memory manager not clean during takedown.\n")) show_leaks(mm); } EXPORT_SYMBOL(drm_mm_takedown); static u64 drm_mm_dump_hole(struct drm_printer *p, const struct drm_mm_node *entry) { u64 start, size; size = entry->hole_size; if (size) { start = drm_mm_hole_node_start(entry); drm_printf(p, "%#018llx-%#018llx: %llu: free\n", start, start + size, size); } return size; } /** * drm_mm_print - print allocator state * @mm: drm_mm allocator to print * @p: DRM printer to use */ void drm_mm_print(const struct drm_mm *mm, struct drm_printer *p) { const struct drm_mm_node *entry; u64 total_used = 0, total_free = 0, total = 0; total_free += drm_mm_dump_hole(p, &mm->head_node); drm_mm_for_each_node(entry, mm) { drm_printf(p, "%#018llx-%#018llx: %llu: used\n", entry->start, entry->start + entry->size, entry->size); total_used += entry->size; total_free += drm_mm_dump_hole(p, entry); } total = total_free + total_used; drm_printf(p, "total: %llu, used %llu free %llu\n", total, total_used, total_free); } EXPORT_SYMBOL(drm_mm_print);
96 85 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 /* SPDX-License-Identifier: GPL-2.0+ */ /* * NILFS meta data file prototype and definitions * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * Written by Ryusuke Konishi. */ #ifndef _NILFS_MDT_H #define _NILFS_MDT_H #include <linux/buffer_head.h> #include <linux/blockgroup_lock.h> #include "nilfs.h" #include "page.h" /** * struct nilfs_shadow_map - shadow mapping of meta data file * @bmap_store: shadow copy of bmap state * @inode: holder of page caches used in shadow mapping * @frozen_buffers: list of frozen buffers */ struct nilfs_shadow_map { struct nilfs_bmap_store bmap_store; struct inode *inode; struct list_head frozen_buffers; }; /** * struct nilfs_mdt_info - on-memory private data of meta data files * @mi_sem: reader/writer semaphore for meta data operations * @mi_bgl: per-blockgroup locking * @mi_entry_size: size of an entry * @mi_first_entry_offset: offset to the first entry * @mi_entries_per_block: number of entries in a block * @mi_palloc_cache: persistent object allocator cache * @mi_shadow: shadow of bmap and page caches * @mi_blocks_per_group: number of blocks in a group * @mi_blocks_per_desc_block: number of blocks per descriptor block */ struct nilfs_mdt_info { struct rw_semaphore mi_sem; struct blockgroup_lock *mi_bgl; unsigned int mi_entry_size; unsigned int mi_first_entry_offset; unsigned long mi_entries_per_block; struct nilfs_palloc_cache *mi_palloc_cache; struct nilfs_shadow_map *mi_shadow; unsigned long mi_blocks_per_group; unsigned long mi_blocks_per_desc_block; }; static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode) { return inode->i_private; } static inline int nilfs_is_metadata_file_inode(const struct inode *inode) { return inode->i_private != NULL; } /* Default GFP flags using highmem */ #define NILFS_MDT_GFP (__GFP_RECLAIM | __GFP_IO | __GFP_HIGHMEM) int nilfs_mdt_get_block(struct inode *, unsigned long, int, void (*init_block)(struct inode *, struct buffer_head *, void *), struct buffer_head **); int nilfs_mdt_find_block(struct inode *inode, unsigned long start, unsigned long end, unsigned long *blkoff, struct buffer_head **out_bh); int nilfs_mdt_delete_block(struct inode *, unsigned long); int nilfs_mdt_forget_block(struct inode *, unsigned long); int nilfs_mdt_fetch_dirty(struct inode *); int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz); void nilfs_mdt_clear(struct inode *inode); void nilfs_mdt_destroy(struct inode *inode); void nilfs_mdt_set_entry_size(struct inode *, unsigned int, unsigned int); int nilfs_mdt_setup_shadow_map(struct inode *inode, struct nilfs_shadow_map *shadow); int nilfs_mdt_save_to_shadow_map(struct inode *inode); void nilfs_mdt_restore_from_shadow_map(struct inode *inode); void nilfs_mdt_clear_shadow_map(struct inode *inode); int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh); struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh); static inline void nilfs_mdt_mark_dirty(struct inode *inode) { if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state)) set_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state); } static inline void nilfs_mdt_clear_dirty(struct inode *inode) { clear_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state); } static inline __u64 nilfs_mdt_cno(struct inode *inode) { return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno; } static inline spinlock_t * nilfs_mdt_bgl_lock(struct inode *inode, unsigned int block_group) { return bgl_lock_ptr(NILFS_MDT(inode)->mi_bgl, block_group); } #endif /* _NILFS_MDT_H */
30 30 30 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* SMBUS message transfer tracepoints * * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #undef TRACE_SYSTEM #define TRACE_SYSTEM smbus #if !defined(_TRACE_SMBUS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SMBUS_H #include <linux/i2c.h> #include <linux/tracepoint.h> /* * drivers/i2c/i2c-core-smbus.c */ /* * i2c_smbus_xfer() write data or procedure call request */ TRACE_EVENT_CONDITION(smbus_write, TP_PROTO(const struct i2c_adapter *adap, u16 addr, unsigned short flags, char read_write, u8 command, int protocol, const union i2c_smbus_data *data), TP_ARGS(adap, addr, flags, read_write, command, protocol, data), TP_CONDITION(read_write == I2C_SMBUS_WRITE || protocol == I2C_SMBUS_PROC_CALL || protocol == I2C_SMBUS_BLOCK_PROC_CALL), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u8, command ) __field(__u8, len ) __field(__u32, protocol ) __array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->addr = addr; __entry->flags = flags; __entry->command = command; __entry->protocol = protocol; switch (protocol) { case I2C_SMBUS_BYTE_DATA: __entry->len = 1; goto copy; case I2C_SMBUS_WORD_DATA: case I2C_SMBUS_PROC_CALL: __entry->len = 2; goto copy; case I2C_SMBUS_BLOCK_DATA: case I2C_SMBUS_BLOCK_PROC_CALL: case I2C_SMBUS_I2C_BLOCK_DATA: __entry->len = data->block[0] + 1; copy: memcpy(__entry->buf, data->block, __entry->len); break; case I2C_SMBUS_QUICK: case I2C_SMBUS_BYTE: case I2C_SMBUS_I2C_BLOCK_BROKEN: default: __entry->len = 0; } ), TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]", __entry->adapter_nr, __entry->addr, __entry->flags, __entry->command, __print_symbolic(__entry->protocol, { I2C_SMBUS_QUICK, "QUICK" }, { I2C_SMBUS_BYTE, "BYTE" }, { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }), __entry->len, __entry->len, __entry->buf )); /* * i2c_smbus_xfer() read data request */ TRACE_EVENT_CONDITION(smbus_read, TP_PROTO(const struct i2c_adapter *adap, u16 addr, unsigned short flags, char read_write, u8 command, int protocol), TP_ARGS(adap, addr, flags, read_write, command, protocol), TP_CONDITION(!(read_write == I2C_SMBUS_WRITE || protocol == I2C_SMBUS_PROC_CALL || protocol == I2C_SMBUS_BLOCK_PROC_CALL)), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, flags ) __field(__u16, addr ) __field(__u8, command ) __field(__u32, protocol ) __array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->addr = addr; __entry->flags = flags; __entry->command = command; __entry->protocol = protocol; ), TP_printk("i2c-%d a=%03x f=%04x c=%x %s", __entry->adapter_nr, __entry->addr, __entry->flags, __entry->command, __print_symbolic(__entry->protocol, { I2C_SMBUS_QUICK, "QUICK" }, { I2C_SMBUS_BYTE, "BYTE" }, { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }) )); /* * i2c_smbus_xfer() read data or procedure call reply */ TRACE_EVENT_CONDITION(smbus_reply, TP_PROTO(const struct i2c_adapter *adap, u16 addr, unsigned short flags, char read_write, u8 command, int protocol, const union i2c_smbus_data *data, int res), TP_ARGS(adap, addr, flags, read_write, command, protocol, data, res), TP_CONDITION(res >= 0 && read_write == I2C_SMBUS_READ), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u8, command ) __field(__u8, len ) __field(__u32, protocol ) __array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->addr = addr; __entry->flags = flags; __entry->command = command; __entry->protocol = protocol; switch (protocol) { case I2C_SMBUS_BYTE: case I2C_SMBUS_BYTE_DATA: __entry->len = 1; goto copy; case I2C_SMBUS_WORD_DATA: case I2C_SMBUS_PROC_CALL: __entry->len = 2; goto copy; case I2C_SMBUS_BLOCK_DATA: case I2C_SMBUS_BLOCK_PROC_CALL: case I2C_SMBUS_I2C_BLOCK_DATA: __entry->len = data->block[0] + 1; copy: memcpy(__entry->buf, data->block, __entry->len); break; case I2C_SMBUS_QUICK: case I2C_SMBUS_I2C_BLOCK_BROKEN: default: __entry->len = 0; } ), TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]", __entry->adapter_nr, __entry->addr, __entry->flags, __entry->command, __print_symbolic(__entry->protocol, { I2C_SMBUS_QUICK, "QUICK" }, { I2C_SMBUS_BYTE, "BYTE" }, { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }), __entry->len, __entry->len, __entry->buf )); /* * i2c_smbus_xfer() result */ TRACE_EVENT(smbus_result, TP_PROTO(const struct i2c_adapter *adap, u16 addr, unsigned short flags, char read_write, u8 command, int protocol, int res), TP_ARGS(adap, addr, flags, read_write, command, protocol, res), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u8, read_write ) __field(__u8, command ) __field(__s16, res ) __field(__u32, protocol ) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->addr = addr; __entry->flags = flags; __entry->read_write = read_write; __entry->command = command; __entry->protocol = protocol; __entry->res = res; ), TP_printk("i2c-%d a=%03x f=%04x c=%x %s %s res=%d", __entry->adapter_nr, __entry->addr, __entry->flags, __entry->command, __print_symbolic(__entry->protocol, { I2C_SMBUS_QUICK, "QUICK" }, { I2C_SMBUS_BYTE, "BYTE" }, { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }), __entry->read_write == I2C_SMBUS_WRITE ? "wr" : "rd", __entry->res )); #endif /* _TRACE_SMBUS_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 // SPDX-License-Identifier: GPL-2.0 /* * USB ZyXEL omni.net driver * * Copyright (C) 2013,2017 Johan Hovold <johan@kernel.org> * * See Documentation/usb/usb-serial.rst for more information on using this * driver * * Please report both successes and troubles to the author at omninet@kroah.com */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/serial.h> #define DRIVER_AUTHOR "Alessandro Zummo" #define DRIVER_DESC "USB ZyXEL omni.net Driver" #define ZYXEL_VENDOR_ID 0x0586 #define ZYXEL_OMNINET_ID 0x1000 #define ZYXEL_OMNI_56K_PLUS_ID 0x1500 /* This one seems to be a re-branded ZyXEL device */ #define BT_IGNITIONPRO_ID 0x2000 /* function prototypes */ static void omninet_process_read_urb(struct urb *urb); static int omninet_prepare_write_buffer(struct usb_serial_port *port, void *buf, size_t count); static int omninet_calc_num_ports(struct usb_serial *serial, struct usb_serial_endpoints *epds); static int omninet_port_probe(struct usb_serial_port *port); static void omninet_port_remove(struct usb_serial_port *port); static const struct usb_device_id id_table[] = { { USB_DEVICE(ZYXEL_VENDOR_ID, ZYXEL_OMNINET_ID) }, { USB_DEVICE(ZYXEL_VENDOR_ID, ZYXEL_OMNI_56K_PLUS_ID) }, { USB_DEVICE(ZYXEL_VENDOR_ID, BT_IGNITIONPRO_ID) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, id_table); static struct usb_serial_driver zyxel_omninet_device = { .driver = { .owner = THIS_MODULE, .name = "omninet", }, .description = "ZyXEL - omni.net usb", .id_table = id_table, .num_bulk_out = 2, .calc_num_ports = omninet_calc_num_ports, .port_probe = omninet_port_probe, .port_remove = omninet_port_remove, .process_read_urb = omninet_process_read_urb, .prepare_write_buffer = omninet_prepare_write_buffer, }; static struct usb_serial_driver * const serial_drivers[] = { &zyxel_omninet_device, NULL }; /* * The protocol. * * The omni.net always exchange 64 bytes of data with the host. The first * four bytes are the control header. * * oh_seq is a sequence number. Don't know if/how it's used. * oh_len is the length of the data bytes in the packet. * oh_xxx Bit-mapped, related to handshaking and status info. * I normally set it to 0x03 in transmitted frames. * 7: Active when the TA is in a CONNECTed state. * 6: unknown * 5: handshaking, unknown * 4: handshaking, unknown * 3: unknown, usually 0 * 2: unknown, usually 0 * 1: handshaking, unknown, usually set to 1 in transmitted frames * 0: handshaking, unknown, usually set to 1 in transmitted frames * oh_pad Probably a pad byte. * * After the header you will find data bytes if oh_len was greater than zero. */ struct omninet_header { __u8 oh_seq; __u8 oh_len; __u8 oh_xxx; __u8 oh_pad; }; struct omninet_data { __u8 od_outseq; /* Sequence number for bulk_out URBs */ }; static int omninet_calc_num_ports(struct usb_serial *serial, struct usb_serial_endpoints *epds) { /* We need only the second bulk-out for our single-port device. */ epds->bulk_out[0] = epds->bulk_out[1]; epds->num_bulk_out = 1; return 1; } static int omninet_port_probe(struct usb_serial_port *port) { struct omninet_data *od; od = kzalloc(sizeof(*od), GFP_KERNEL); if (!od) return -ENOMEM; usb_set_serial_port_data(port, od); return 0; } static void omninet_port_remove(struct usb_serial_port *port) { struct omninet_data *od; od = usb_get_serial_port_data(port); kfree(od); } #define OMNINET_HEADERLEN 4 #define OMNINET_BULKOUTSIZE 64 #define OMNINET_PAYLOADSIZE (OMNINET_BULKOUTSIZE - OMNINET_HEADERLEN) static void omninet_process_read_urb(struct urb *urb) { struct usb_serial_port *port = urb->context; const struct omninet_header *hdr = urb->transfer_buffer; const unsigned char *data; size_t data_len; if (urb->actual_length <= OMNINET_HEADERLEN || !hdr->oh_len) return; data = (char *)urb->transfer_buffer + OMNINET_HEADERLEN; data_len = min_t(size_t, urb->actual_length - OMNINET_HEADERLEN, hdr->oh_len); tty_insert_flip_string(&port->port, data, data_len); tty_flip_buffer_push(&port->port); } static int omninet_prepare_write_buffer(struct usb_serial_port *port, void *buf, size_t count) { struct omninet_data *od = usb_get_serial_port_data(port); struct omninet_header *header = buf; count = min_t(size_t, count, OMNINET_PAYLOADSIZE); count = kfifo_out_locked(&port->write_fifo, buf + OMNINET_HEADERLEN, count, &port->lock); header->oh_seq = od->od_outseq++; header->oh_len = count; header->oh_xxx = 0x03; header->oh_pad = 0x00; /* always 64 bytes */ return OMNINET_BULKOUTSIZE; } module_usb_serial_driver(serial_drivers, id_table); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL v2");
2 4777 4113 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_USER_NAMESPACE_H #define _LINUX_USER_NAMESPACE_H #include <linux/kref.h> #include <linux/nsproxy.h> #include <linux/ns_common.h> #include <linux/sched.h> #include <linux/workqueue.h> #include <linux/rwsem.h> #include <linux/sysctl.h> #include <linux/err.h> #define UID_GID_MAP_MAX_BASE_EXTENTS 5 #define UID_GID_MAP_MAX_EXTENTS 340 struct uid_gid_extent { u32 first; u32 lower_first; u32 count; }; struct uid_gid_map { /* 64 bytes -- 1 cache line */ union { struct { struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS]; u32 nr_extents; }; struct { struct uid_gid_extent *forward; struct uid_gid_extent *reverse; }; }; }; #define USERNS_SETGROUPS_ALLOWED 1UL #define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED struct ucounts; enum ucount_type { UCOUNT_USER_NAMESPACES, UCOUNT_PID_NAMESPACES, UCOUNT_UTS_NAMESPACES, UCOUNT_IPC_NAMESPACES, UCOUNT_NET_NAMESPACES, UCOUNT_MNT_NAMESPACES, UCOUNT_CGROUP_NAMESPACES, UCOUNT_TIME_NAMESPACES, #ifdef CONFIG_INOTIFY_USER UCOUNT_INOTIFY_INSTANCES, UCOUNT_INOTIFY_WATCHES, #endif #ifdef CONFIG_FANOTIFY UCOUNT_FANOTIFY_GROUPS, UCOUNT_FANOTIFY_MARKS, #endif UCOUNT_COUNTS, }; enum rlimit_type { UCOUNT_RLIMIT_NPROC, UCOUNT_RLIMIT_MSGQUEUE, UCOUNT_RLIMIT_SIGPENDING, UCOUNT_RLIMIT_MEMLOCK, UCOUNT_RLIMIT_COUNTS, }; #if IS_ENABLED(CONFIG_BINFMT_MISC) struct binfmt_misc; #endif struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; struct uid_gid_map projid_map; struct user_namespace *parent; int level; kuid_t owner; kgid_t group; struct ns_common ns; unsigned long flags; /* parent_could_setfcap: true if the creator if this ns had CAP_SETFCAP * in its effective capability set at the child ns creation time. */ bool parent_could_setfcap; #ifdef CONFIG_KEYS /* List of joinable keyrings in this namespace. Modification access of * these pointers is controlled by keyring_sem. Once * user_keyring_register is set, it won't be changed, so it can be * accessed directly with READ_ONCE(). */ struct list_head keyring_name_list; struct key *user_keyring_register; struct rw_semaphore keyring_sem; #endif /* Register of per-UID persistent keyrings for this namespace */ #ifdef CONFIG_PERSISTENT_KEYRINGS struct key *persistent_keyring_register; #endif struct work_struct work; #ifdef CONFIG_SYSCTL struct ctl_table_set set; struct ctl_table_header *sysctls; #endif struct ucounts *ucounts; long ucount_max[UCOUNT_COUNTS]; long rlimit_max[UCOUNT_RLIMIT_COUNTS]; #if IS_ENABLED(CONFIG_BINFMT_MISC) struct binfmt_misc *binfmt_misc; #endif } __randomize_layout; struct ucounts { struct hlist_node node; struct user_namespace *ns; kuid_t uid; atomic_t count; atomic_long_t ucount[UCOUNT_COUNTS]; atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS]; }; extern struct user_namespace init_user_ns; extern struct ucounts init_ucounts; bool setup_userns_sysctls(struct user_namespace *ns); void retire_userns_sysctls(struct user_namespace *ns); struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); void dec_ucount(struct ucounts *ucounts, enum ucount_type type); struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); struct ucounts * __must_check get_ucounts(struct ucounts *ucounts); void put_ucounts(struct ucounts *ucounts); static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type type) { return atomic_long_read(&ucounts->rlimit[type]); } long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type); void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type); bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max); static inline long get_userns_rlimit_max(struct user_namespace *ns, enum rlimit_type type) { return READ_ONCE(ns->rlimit_max[type]); } static inline void set_userns_rlimit_max(struct user_namespace *ns, enum rlimit_type type, unsigned long max) { ns->rlimit_max[type] = max <= LONG_MAX ? max : LONG_MAX; } #ifdef CONFIG_USER_NS static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) refcount_inc(&ns->ns.count); return ns; } extern int create_user_ns(struct cred *new); extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred); extern void __put_user_ns(struct user_namespace *ns); static inline void put_user_ns(struct user_namespace *ns) { if (ns && refcount_dec_and_test(&ns->ns.count)) __put_user_ns(ns); } struct seq_operations; extern const struct seq_operations proc_uid_seq_operations; extern const struct seq_operations proc_gid_seq_operations; extern const struct seq_operations proc_projid_seq_operations; extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); extern int proc_setgroups_show(struct seq_file *m, void *v); extern bool userns_may_setgroups(const struct user_namespace *ns); extern bool in_userns(const struct user_namespace *ancestor, const struct user_namespace *child); extern bool current_in_userns(const struct user_namespace *target_ns); struct ns_common *ns_get_owner(struct ns_common *ns); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { return &init_user_ns; } static inline int create_user_ns(struct cred *new) { return -EINVAL; } static inline int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) { if (unshare_flags & CLONE_NEWUSER) return -EINVAL; return 0; } static inline void put_user_ns(struct user_namespace *ns) { } static inline bool userns_may_setgroups(const struct user_namespace *ns) { return true; } static inline bool in_userns(const struct user_namespace *ancestor, const struct user_namespace *child) { return true; } static inline bool current_in_userns(const struct user_namespace *target_ns) { return true; } static inline struct ns_common *ns_get_owner(struct ns_common *ns) { return ERR_PTR(-EPERM); } #endif #endif /* _LINUX_USER_H */
2888 3316 3255 2971 3333 2999 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM rpm #if !defined(_TRACE_RUNTIME_POWER_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_RUNTIME_POWER_H #include <linux/ktime.h> #include <linux/tracepoint.h> struct device; /* * The rpm_internal events are used for tracing some important * runtime pm internal functions. */ DECLARE_EVENT_CLASS(rpm_internal, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags), TP_STRUCT__entry( __string( name, dev_name(dev) ) __field( int, flags ) __field( int , usage_count ) __field( int , disable_depth ) __field( int , runtime_auto ) __field( int , request_pending ) __field( int , irq_safe ) __field( int , child_count ) ), TP_fast_assign( __assign_str(name); __entry->flags = flags; __entry->usage_count = atomic_read( &dev->power.usage_count); __entry->disable_depth = dev->power.disable_depth; __entry->runtime_auto = dev->power.runtime_auto; __entry->request_pending = dev->power.request_pending; __entry->irq_safe = dev->power.irq_safe; __entry->child_count = atomic_read( &dev->power.child_count); ), TP_printk("%s flags-%x cnt-%-2d dep-%-2d auto-%-1d p-%-1d" " irq-%-1d child-%d", __get_str(name), __entry->flags, __entry->usage_count, __entry->disable_depth, __entry->runtime_auto, __entry->request_pending, __entry->irq_safe, __entry->child_count ) ); DEFINE_EVENT(rpm_internal, rpm_suspend, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags) ); DEFINE_EVENT(rpm_internal, rpm_resume, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags) ); DEFINE_EVENT(rpm_internal, rpm_idle, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags) ); DEFINE_EVENT(rpm_internal, rpm_usage, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags) ); TRACE_EVENT(rpm_return_int, TP_PROTO(struct device *dev, unsigned long ip, int ret), TP_ARGS(dev, ip, ret), TP_STRUCT__entry( __string( name, dev_name(dev)) __field( unsigned long, ip ) __field( int, ret ) ), TP_fast_assign( __assign_str(name); __entry->ip = ip; __entry->ret = ret; ), TP_printk("%pS:%s ret=%d", (void *)__entry->ip, __get_str(name), __entry->ret) ); #define RPM_STATUS_STRINGS \ EM(RPM_INVALID, "RPM_INVALID") \ EM(RPM_ACTIVE, "RPM_ACTIVE") \ EM(RPM_RESUMING, "RPM_RESUMING") \ EM(RPM_SUSPENDED, "RPM_SUSPENDED") \ EMe(RPM_SUSPENDING, "RPM_SUSPENDING") /* Enums require being exported to userspace, for user tool parsing. */ #undef EM #undef EMe #define EM(a, b) TRACE_DEFINE_ENUM(a); #define EMe(a, b) TRACE_DEFINE_ENUM(a); RPM_STATUS_STRINGS /* * Now redefine the EM() and EMe() macros to map the enums to the strings that * will be printed in the output. */ #undef EM #undef EMe #define EM(a, b) { a, b }, #define EMe(a, b) { a, b } TRACE_EVENT(rpm_status, TP_PROTO(struct device *dev, enum rpm_status status), TP_ARGS(dev, status), TP_STRUCT__entry( __string(name, dev_name(dev)) __field(int, status) ), TP_fast_assign( __assign_str(name); __entry->status = status; ), TP_printk("%s status=%s", __get_str(name), __print_symbolic(__entry->status, RPM_STATUS_STRINGS)) ); #endif /* _TRACE_RUNTIME_POWER_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. * All Rights Reserved. */ #ifndef __XFS_RTBITMAP_H__ #define __XFS_RTBITMAP_H__ struct xfs_rtalloc_args { struct xfs_mount *mp; struct xfs_trans *tp; struct xfs_buf *rbmbp; /* bitmap block buffer */ struct xfs_buf *sumbp; /* summary block buffer */ xfs_fileoff_t rbmoff; /* bitmap block number */ xfs_fileoff_t sumoff; /* summary block number */ }; static inline xfs_rtblock_t xfs_rtx_to_rtb( struct xfs_mount *mp, xfs_rtxnum_t rtx) { if (mp->m_rtxblklog >= 0) return rtx << mp->m_rtxblklog; return rtx * mp->m_sb.sb_rextsize; } static inline xfs_extlen_t xfs_rtxlen_to_extlen( struct xfs_mount *mp, xfs_rtxlen_t rtxlen) { if (mp->m_rtxblklog >= 0) return rtxlen << mp->m_rtxblklog; return rtxlen * mp->m_sb.sb_rextsize; } /* Compute the misalignment between an extent length and a realtime extent .*/ static inline unsigned int xfs_extlen_to_rtxmod( struct xfs_mount *mp, xfs_extlen_t len) { if (mp->m_rtxblklog >= 0) return len & mp->m_rtxblkmask; return len % mp->m_sb.sb_rextsize; } static inline xfs_rtxlen_t xfs_extlen_to_rtxlen( struct xfs_mount *mp, xfs_extlen_t len) { if (mp->m_rtxblklog >= 0) return len >> mp->m_rtxblklog; return len / mp->m_sb.sb_rextsize; } /* Convert an rt block number into an rt extent number. */ static inline xfs_rtxnum_t xfs_rtb_to_rtx( struct xfs_mount *mp, xfs_rtblock_t rtbno) { if (likely(mp->m_rtxblklog >= 0)) return rtbno >> mp->m_rtxblklog; return div_u64(rtbno, mp->m_sb.sb_rextsize); } /* Return the offset of an rt block number within an rt extent. */ static inline xfs_extlen_t xfs_rtb_to_rtxoff( struct xfs_mount *mp, xfs_rtblock_t rtbno) { if (likely(mp->m_rtxblklog >= 0)) return rtbno & mp->m_rtxblkmask; return do_div(rtbno, mp->m_sb.sb_rextsize); } /* * Crack an rt block number into an rt extent number and an offset within that * rt extent. Returns the rt extent number directly and the offset in @off. */ static inline xfs_rtxnum_t xfs_rtb_to_rtxrem( struct xfs_mount *mp, xfs_rtblock_t rtbno, xfs_extlen_t *off) { if (likely(mp->m_rtxblklog >= 0)) { *off = rtbno & mp->m_rtxblkmask; return rtbno >> mp->m_rtxblklog; } return div_u64_rem(rtbno, mp->m_sb.sb_rextsize, off); } /* * Convert an rt block number into an rt extent number, rounding up to the next * rt extent if the rt block is not aligned to an rt extent boundary. */ static inline xfs_rtxnum_t xfs_rtb_to_rtxup( struct xfs_mount *mp, xfs_rtblock_t rtbno) { if (likely(mp->m_rtxblklog >= 0)) { if (rtbno & mp->m_rtxblkmask) return (rtbno >> mp->m_rtxblklog) + 1; return rtbno >> mp->m_rtxblklog; } if (do_div(rtbno, mp->m_sb.sb_rextsize)) rtbno++; return rtbno; } /* Round this rtblock up to the nearest rt extent size. */ static inline xfs_rtblock_t xfs_rtb_roundup_rtx( struct xfs_mount *mp, xfs_rtblock_t rtbno) { return roundup_64(rtbno, mp->m_sb.sb_rextsize); } /* Round this rtblock down to the nearest rt extent size. */ static inline xfs_rtblock_t xfs_rtb_rounddown_rtx( struct xfs_mount *mp, xfs_rtblock_t rtbno) { return rounddown_64(rtbno, mp->m_sb.sb_rextsize); } /* Convert an rt extent number to a file block offset in the rt bitmap file. */ static inline xfs_fileoff_t xfs_rtx_to_rbmblock( struct xfs_mount *mp, xfs_rtxnum_t rtx) { return rtx >> mp->m_blkbit_log; } /* Convert an rt extent number to a word offset within an rt bitmap block. */ static inline unsigned int xfs_rtx_to_rbmword( struct xfs_mount *mp, xfs_rtxnum_t rtx) { return (rtx >> XFS_NBWORDLOG) & (mp->m_blockwsize - 1); } /* Convert a file block offset in the rt bitmap file to an rt extent number. */ static inline xfs_rtxnum_t xfs_rbmblock_to_rtx( struct xfs_mount *mp, xfs_fileoff_t rbmoff) { return rbmoff << mp->m_blkbit_log; } /* Return a pointer to a bitmap word within a rt bitmap block. */ static inline union xfs_rtword_raw * xfs_rbmblock_wordptr( struct xfs_rtalloc_args *args, unsigned int index) { union xfs_rtword_raw *words = args->rbmbp->b_addr; return words + index; } /* Convert an ondisk bitmap word to its incore representation. */ static inline xfs_rtword_t xfs_rtbitmap_getword( struct xfs_rtalloc_args *args, unsigned int index) { union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index); return word->old; } /* Set an ondisk bitmap word from an incore representation. */ static inline void xfs_rtbitmap_setword( struct xfs_rtalloc_args *args, unsigned int index, xfs_rtword_t value) { union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index); word->old = value; } /* * Convert a rt extent length and rt bitmap block number to a xfs_suminfo_t * offset within the rt summary file. */ static inline xfs_rtsumoff_t xfs_rtsumoffs( struct xfs_mount *mp, int log2_len, xfs_fileoff_t rbmoff) { return log2_len * mp->m_sb.sb_rbmblocks + rbmoff; } /* * Convert an xfs_suminfo_t offset to a file block offset within the rt summary * file. */ static inline xfs_fileoff_t xfs_rtsumoffs_to_block( struct xfs_mount *mp, xfs_rtsumoff_t rsumoff) { return XFS_B_TO_FSBT(mp, rsumoff * sizeof(xfs_suminfo_t)); } /* * Convert an xfs_suminfo_t offset to an info word offset within an rt summary * block. */ static inline unsigned int xfs_rtsumoffs_to_infoword( struct xfs_mount *mp, xfs_rtsumoff_t rsumoff) { unsigned int mask = mp->m_blockmask >> XFS_SUMINFOLOG; return rsumoff & mask; } /* Return a pointer to a summary info word within a rt summary block. */ static inline union xfs_suminfo_raw * xfs_rsumblock_infoptr( struct xfs_rtalloc_args *args, unsigned int index) { union xfs_suminfo_raw *info = args->sumbp->b_addr; return info + index; } /* Get the current value of a summary counter. */ static inline xfs_suminfo_t xfs_suminfo_get( struct xfs_rtalloc_args *args, unsigned int index) { union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index); return info->old; } /* Add to the current value of a summary counter and return the new value. */ static inline xfs_suminfo_t xfs_suminfo_add( struct xfs_rtalloc_args *args, unsigned int index, int delta) { union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index); info->old += delta; return info->old; } /* * Functions for walking free space rtextents in the realtime bitmap. */ struct xfs_rtalloc_rec { xfs_rtxnum_t ar_startext; xfs_rtbxlen_t ar_extcount; }; typedef int (*xfs_rtalloc_query_range_fn)( struct xfs_mount *mp, struct xfs_trans *tp, const struct xfs_rtalloc_rec *rec, void *priv); #ifdef CONFIG_XFS_RT void xfs_rtbuf_cache_relse(struct xfs_rtalloc_args *args); int xfs_rtbuf_get(struct xfs_rtalloc_args *args, xfs_fileoff_t block, int issum); static inline int xfs_rtbitmap_read_buf( struct xfs_rtalloc_args *args, xfs_fileoff_t block) { return xfs_rtbuf_get(args, block, 0); } static inline int xfs_rtsummary_read_buf( struct xfs_rtalloc_args *args, xfs_fileoff_t block) { return xfs_rtbuf_get(args, block, 1); } int xfs_rtcheck_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, xfs_rtxlen_t len, int val, xfs_rtxnum_t *new, int *stat); int xfs_rtfind_back(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock); int xfs_rtfind_forw(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock); int xfs_rtmodify_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, xfs_rtxlen_t len, int val); int xfs_rtget_summary(struct xfs_rtalloc_args *args, int log, xfs_fileoff_t bbno, xfs_suminfo_t *sum); int xfs_rtmodify_summary(struct xfs_rtalloc_args *args, int log, xfs_fileoff_t bbno, int delta); int xfs_rtfree_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, xfs_rtxlen_t len); int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp, const struct xfs_rtalloc_rec *low_rec, const struct xfs_rtalloc_rec *high_rec, xfs_rtalloc_query_range_fn fn, void *priv); int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtalloc_query_range_fn fn, void *priv); int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtxnum_t start, xfs_rtxlen_t len, bool *is_free); /* * Free an extent in the realtime subvolume. Length is expressed in * realtime extents, as is the block number. */ int /* error */ xfs_rtfree_extent( struct xfs_trans *tp, /* transaction pointer */ xfs_rtxnum_t start, /* starting rtext number to free */ xfs_rtxlen_t len); /* length of extent freed */ /* Same as above, but in units of rt blocks. */ int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, xfs_filblks_t rtlen); xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents); unsigned long long xfs_rtbitmap_wordcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents); xfs_filblks_t xfs_rtsummary_blockcount(struct xfs_mount *mp, unsigned int rsumlevels, xfs_extlen_t rbmblocks); unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp, unsigned int rsumlevels, xfs_extlen_t rbmblocks); void xfs_rtbitmap_lock(struct xfs_trans *tp, struct xfs_mount *mp); void xfs_rtbitmap_unlock(struct xfs_mount *mp); /* Lock the rt bitmap inode in shared mode */ #define XFS_RBMLOCK_BITMAP (1U << 0) /* Lock the rt summary inode in shared mode */ #define XFS_RBMLOCK_SUMMARY (1U << 1) void xfs_rtbitmap_lock_shared(struct xfs_mount *mp, unsigned int rbmlock_flags); void xfs_rtbitmap_unlock_shared(struct xfs_mount *mp, unsigned int rbmlock_flags); #else /* CONFIG_XFS_RT */ # define xfs_rtfree_extent(t,b,l) (-ENOSYS) # define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) # define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS) # define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS) # define xfs_rtbitmap_read_buf(a,b) (-ENOSYS) # define xfs_rtsummary_read_buf(a,b) (-ENOSYS) # define xfs_rtbuf_cache_relse(a) (0) # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) static inline xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents) { /* shut up gcc */ return 0; } # define xfs_rtbitmap_wordcount(mp, r) (0) # define xfs_rtsummary_blockcount(mp, l, b) (0) # define xfs_rtsummary_wordcount(mp, l, b) (0) # define xfs_rtbitmap_lock(tp, mp) do { } while (0) # define xfs_rtbitmap_unlock(mp) do { } while (0) # define xfs_rtbitmap_lock_shared(mp, lf) do { } while (0) # define xfs_rtbitmap_unlock_shared(mp, lf) do { } while (0) #endif /* CONFIG_XFS_RT */ #endif /* __XFS_RTBITMAP_H__ */
121 26 21 29 21 40 54 15 45 111 111 342 42 280 59 12 26 14 45 29 1 29 32 1 4 2 29 21 13 2 12 10 200 60 104 28 6 14 4 13 1 1 1 4 9 9 1 1 1 1 5 2 2 40 3 20 20 14 14 5 4 12 11 35 13 15 35 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 // SPDX-License-Identifier: GPL-2.0-only /* * Longest prefix match list implementation * * Copyright (c) 2016,2017 Daniel Mack * Copyright (c) 2016 David Herrmann */ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/vmalloc.h> #include <net/ipv6.h> #include <uapi/linux/btf.h> #include <linux/btf_ids.h> /* Intermediate node */ #define LPM_TREE_NODE_FLAG_IM BIT(0) struct lpm_trie_node; struct lpm_trie_node { struct rcu_head rcu; struct lpm_trie_node __rcu *child[2]; u32 prefixlen; u32 flags; u8 data[]; }; struct lpm_trie { struct bpf_map map; struct lpm_trie_node __rcu *root; size_t n_entries; size_t max_prefixlen; size_t data_size; spinlock_t lock; }; /* This trie implements a longest prefix match algorithm that can be used to * match IP addresses to a stored set of ranges. * * Data stored in @data of struct bpf_lpm_key and struct lpm_trie_node is * interpreted as big endian, so data[0] stores the most significant byte. * * Match ranges are internally stored in instances of struct lpm_trie_node * which each contain their prefix length as well as two pointers that may * lead to more nodes containing more specific matches. Each node also stores * a value that is defined by and returned to userspace via the update_elem * and lookup functions. * * For instance, let's start with a trie that was created with a prefix length * of 32, so it can be used for IPv4 addresses, and one single element that * matches 192.168.0.0/16. The data array would hence contain * [0xc0, 0xa8, 0x00, 0x00] in big-endian notation. This documentation will * stick to IP-address notation for readability though. * * As the trie is empty initially, the new node (1) will be places as root * node, denoted as (R) in the example below. As there are no other node, both * child pointers are %NULL. * * +----------------+ * | (1) (R) | * | 192.168.0.0/16 | * | value: 1 | * | [0] [1] | * +----------------+ * * Next, let's add a new node (2) matching 192.168.0.0/24. As there is already * a node with the same data and a smaller prefix (ie, a less specific one), * node (2) will become a child of (1). In child index depends on the next bit * that is outside of what (1) matches, and that bit is 0, so (2) will be * child[0] of (1): * * +----------------+ * | (1) (R) | * | 192.168.0.0/16 | * | value: 1 | * | [0] [1] | * +----------------+ * | * +----------------+ * | (2) | * | 192.168.0.0/24 | * | value: 2 | * | [0] [1] | * +----------------+ * * The child[1] slot of (1) could be filled with another node which has bit #17 * (the next bit after the ones that (1) matches on) set to 1. For instance, * 192.168.128.0/24: * * +----------------+ * | (1) (R) | * | 192.168.0.0/16 | * | value: 1 | * | [0] [1] | * +----------------+ * | | * +----------------+ +------------------+ * | (2) | | (3) | * | 192.168.0.0/24 | | 192.168.128.0/24 | * | value: 2 | | value: 3 | * | [0] [1] | | [0] [1] | * +----------------+ +------------------+ * * Let's add another node (4) to the game for 192.168.1.0/24. In order to place * it, node (1) is looked at first, and because (4) of the semantics laid out * above (bit #17 is 0), it would normally be attached to (1) as child[0]. * However, that slot is already allocated, so a new node is needed in between. * That node does not have a value attached to it and it will never be * returned to users as result of a lookup. It is only there to differentiate * the traversal further. It will get a prefix as wide as necessary to * distinguish its two children: * * +----------------+ * | (1) (R) | * | 192.168.0.0/16 | * | value: 1 | * | [0] [1] | * +----------------+ * | | * +----------------+ +------------------+ * | (4) (I) | | (3) | * | 192.168.0.0/23 | | 192.168.128.0/24 | * | value: --- | | value: 3 | * | [0] [1] | | [0] [1] | * +----------------+ +------------------+ * | | * +----------------+ +----------------+ * | (2) | | (5) | * | 192.168.0.0/24 | | 192.168.1.0/24 | * | value: 2 | | value: 5 | * | [0] [1] | | [0] [1] | * +----------------+ +----------------+ * * 192.168.1.1/32 would be a child of (5) etc. * * An intermediate node will be turned into a 'real' node on demand. In the * example above, (4) would be re-used if 192.168.0.0/23 is added to the trie. * * A fully populated trie would have a height of 32 nodes, as the trie was * created with a prefix length of 32. * * The lookup starts at the root node. If the current node matches and if there * is a child that can be used to become more specific, the trie is traversed * downwards. The last node in the traversal that is a non-intermediate one is * returned. */ static inline int extract_bit(const u8 *data, size_t index) { return !!(data[index / 8] & (1 << (7 - (index % 8)))); } /** * __longest_prefix_match() - determine the longest prefix * @trie: The trie to get internal sizes from * @node: The node to operate on * @key: The key to compare to @node * * Determine the longest prefix of @node that matches the bits in @key. */ static __always_inline size_t __longest_prefix_match(const struct lpm_trie *trie, const struct lpm_trie_node *node, const struct bpf_lpm_trie_key_u8 *key) { u32 limit = min(node->prefixlen, key->prefixlen); u32 prefixlen = 0, i = 0; BUILD_BUG_ON(offsetof(struct lpm_trie_node, data) % sizeof(u32)); BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key_u8, data) % sizeof(u32)); #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(CONFIG_64BIT) /* data_size >= 16 has very small probability. * We do not use a loop for optimal code generation. */ if (trie->data_size >= 8) { u64 diff = be64_to_cpu(*(__be64 *)node->data ^ *(__be64 *)key->data); prefixlen = 64 - fls64(diff); if (prefixlen >= limit) return limit; if (diff) return prefixlen; i = 8; } #endif while (trie->data_size >= i + 4) { u32 diff = be32_to_cpu(*(__be32 *)&node->data[i] ^ *(__be32 *)&key->data[i]); prefixlen += 32 - fls(diff); if (prefixlen >= limit) return limit; if (diff) return prefixlen; i += 4; } if (trie->data_size >= i + 2) { u16 diff = be16_to_cpu(*(__be16 *)&node->data[i] ^ *(__be16 *)&key->data[i]); prefixlen += 16 - fls(diff); if (prefixlen >= limit) return limit; if (diff) return prefixlen; i += 2; } if (trie->data_size >= i + 1) { prefixlen += 8 - fls(node->data[i] ^ key->data[i]); if (prefixlen >= limit) return limit; } return prefixlen; } static size_t longest_prefix_match(const struct lpm_trie *trie, const struct lpm_trie_node *node, const struct bpf_lpm_trie_key_u8 *key) { return __longest_prefix_match(trie, node, key); } /* Called from syscall or from eBPF program */ static void *trie_lookup_elem(struct bpf_map *map, void *_key) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct lpm_trie_node *node, *found = NULL; struct bpf_lpm_trie_key_u8 *key = _key; if (key->prefixlen > trie->max_prefixlen) return NULL; /* Start walking the trie from the root node ... */ for (node = rcu_dereference_check(trie->root, rcu_read_lock_bh_held()); node;) { unsigned int next_bit; size_t matchlen; /* Determine the longest prefix of @node that matches @key. * If it's the maximum possible prefix for this trie, we have * an exact match and can return it directly. */ matchlen = __longest_prefix_match(trie, node, key); if (matchlen == trie->max_prefixlen) { found = node; break; } /* If the number of bits that match is smaller than the prefix * length of @node, bail out and return the node we have seen * last in the traversal (ie, the parent). */ if (matchlen < node->prefixlen) break; /* Consider this node as return candidate unless it is an * artificially added intermediate one. */ if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) found = node; /* If the node match is fully satisfied, let's see if we can * become more specific. Determine the next bit in the key and * traverse down. */ next_bit = extract_bit(key->data, node->prefixlen); node = rcu_dereference_check(node->child[next_bit], rcu_read_lock_bh_held()); } if (!found) return NULL; return found->data + trie->data_size; } static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie, const void *value) { struct lpm_trie_node *node; size_t size = sizeof(struct lpm_trie_node) + trie->data_size; if (value) size += trie->map.value_size; node = bpf_map_kmalloc_node(&trie->map, size, GFP_NOWAIT | __GFP_NOWARN, trie->map.numa_node); if (!node) return NULL; node->flags = 0; if (value) memcpy(node->data + trie->data_size, value, trie->map.value_size); return node; } /* Called from syscall or from eBPF program */ static long trie_update_elem(struct bpf_map *map, void *_key, void *value, u64 flags) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL; struct lpm_trie_node *free_node = NULL; struct lpm_trie_node __rcu **slot; struct bpf_lpm_trie_key_u8 *key = _key; unsigned long irq_flags; unsigned int next_bit; size_t matchlen = 0; int ret = 0; if (unlikely(flags > BPF_EXIST)) return -EINVAL; if (key->prefixlen > trie->max_prefixlen) return -EINVAL; spin_lock_irqsave(&trie->lock, irq_flags); /* Allocate and fill a new node */ if (trie->n_entries == trie->map.max_entries) { ret = -ENOSPC; goto out; } new_node = lpm_trie_node_alloc(trie, value); if (!new_node) { ret = -ENOMEM; goto out; } trie->n_entries++; new_node->prefixlen = key->prefixlen; RCU_INIT_POINTER(new_node->child[0], NULL); RCU_INIT_POINTER(new_node->child[1], NULL); memcpy(new_node->data, key->data, trie->data_size); /* Now find a slot to attach the new node. To do that, walk the tree * from the root and match as many bits as possible for each node until * we either find an empty slot or a slot that needs to be replaced by * an intermediate node. */ slot = &trie->root; while ((node = rcu_dereference_protected(*slot, lockdep_is_held(&trie->lock)))) { matchlen = longest_prefix_match(trie, node, key); if (node->prefixlen != matchlen || node->prefixlen == key->prefixlen || node->prefixlen == trie->max_prefixlen) break; next_bit = extract_bit(key->data, node->prefixlen); slot = &node->child[next_bit]; } /* If the slot is empty (a free child pointer or an empty root), * simply assign the @new_node to that slot and be done. */ if (!node) { rcu_assign_pointer(*slot, new_node); goto out; } /* If the slot we picked already exists, replace it with @new_node * which already has the correct data array set. */ if (node->prefixlen == matchlen) { new_node->child[0] = node->child[0]; new_node->child[1] = node->child[1]; if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) trie->n_entries--; rcu_assign_pointer(*slot, new_node); free_node = node; goto out; } /* If the new node matches the prefix completely, it must be inserted * as an ancestor. Simply insert it between @node and *@slot. */ if (matchlen == key->prefixlen) { next_bit = extract_bit(node->data, matchlen); rcu_assign_pointer(new_node->child[next_bit], node); rcu_assign_pointer(*slot, new_node); goto out; } im_node = lpm_trie_node_alloc(trie, NULL); if (!im_node) { ret = -ENOMEM; goto out; } im_node->prefixlen = matchlen; im_node->flags |= LPM_TREE_NODE_FLAG_IM; memcpy(im_node->data, node->data, trie->data_size); /* Now determine which child to install in which slot */ if (extract_bit(key->data, matchlen)) { rcu_assign_pointer(im_node->child[0], node); rcu_assign_pointer(im_node->child[1], new_node); } else { rcu_assign_pointer(im_node->child[0], new_node); rcu_assign_pointer(im_node->child[1], node); } /* Finally, assign the intermediate node to the determined slot */ rcu_assign_pointer(*slot, im_node); out: if (ret) { if (new_node) trie->n_entries--; kfree(new_node); kfree(im_node); } spin_unlock_irqrestore(&trie->lock, irq_flags); kfree_rcu(free_node, rcu); return ret; } /* Called from syscall or from eBPF program */ static long trie_delete_elem(struct bpf_map *map, void *_key) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct lpm_trie_node *free_node = NULL, *free_parent = NULL; struct bpf_lpm_trie_key_u8 *key = _key; struct lpm_trie_node __rcu **trim, **trim2; struct lpm_trie_node *node, *parent; unsigned long irq_flags; unsigned int next_bit; size_t matchlen = 0; int ret = 0; if (key->prefixlen > trie->max_prefixlen) return -EINVAL; spin_lock_irqsave(&trie->lock, irq_flags); /* Walk the tree looking for an exact key/length match and keeping * track of the path we traverse. We will need to know the node * we wish to delete, and the slot that points to the node we want * to delete. We may also need to know the nodes parent and the * slot that contains it. */ trim = &trie->root; trim2 = trim; parent = NULL; while ((node = rcu_dereference_protected( *trim, lockdep_is_held(&trie->lock)))) { matchlen = longest_prefix_match(trie, node, key); if (node->prefixlen != matchlen || node->prefixlen == key->prefixlen) break; parent = node; trim2 = trim; next_bit = extract_bit(key->data, node->prefixlen); trim = &node->child[next_bit]; } if (!node || node->prefixlen != key->prefixlen || node->prefixlen != matchlen || (node->flags & LPM_TREE_NODE_FLAG_IM)) { ret = -ENOENT; goto out; } trie->n_entries--; /* If the node we are removing has two children, simply mark it * as intermediate and we are done. */ if (rcu_access_pointer(node->child[0]) && rcu_access_pointer(node->child[1])) { node->flags |= LPM_TREE_NODE_FLAG_IM; goto out; } /* If the parent of the node we are about to delete is an intermediate * node, and the deleted node doesn't have any children, we can delete * the intermediate parent as well and promote its other child * up the tree. Doing this maintains the invariant that all * intermediate nodes have exactly 2 children and that there are no * unnecessary intermediate nodes in the tree. */ if (parent && (parent->flags & LPM_TREE_NODE_FLAG_IM) && !node->child[0] && !node->child[1]) { if (node == rcu_access_pointer(parent->child[0])) rcu_assign_pointer( *trim2, rcu_access_pointer(parent->child[1])); else rcu_assign_pointer( *trim2, rcu_access_pointer(parent->child[0])); free_parent = parent; free_node = node; goto out; } /* The node we are removing has either zero or one child. If there * is a child, move it into the removed node's slot then delete * the node. Otherwise just clear the slot and delete the node. */ if (node->child[0]) rcu_assign_pointer(*trim, rcu_access_pointer(node->child[0])); else if (node->child[1]) rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1])); else RCU_INIT_POINTER(*trim, NULL); free_node = node; out: spin_unlock_irqrestore(&trie->lock, irq_flags); kfree_rcu(free_parent, rcu); kfree_rcu(free_node, rcu); return ret; } #define LPM_DATA_SIZE_MAX 256 #define LPM_DATA_SIZE_MIN 1 #define LPM_VAL_SIZE_MAX (KMALLOC_MAX_SIZE - LPM_DATA_SIZE_MAX - \ sizeof(struct lpm_trie_node)) #define LPM_VAL_SIZE_MIN 1 #define LPM_KEY_SIZE(X) (sizeof(struct bpf_lpm_trie_key_u8) + (X)) #define LPM_KEY_SIZE_MAX LPM_KEY_SIZE(LPM_DATA_SIZE_MAX) #define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN) #define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | \ BPF_F_ACCESS_MASK) static struct bpf_map *trie_alloc(union bpf_attr *attr) { struct lpm_trie *trie; /* check sanity of attributes */ if (attr->max_entries == 0 || !(attr->map_flags & BPF_F_NO_PREALLOC) || attr->map_flags & ~LPM_CREATE_FLAG_MASK || !bpf_map_flags_access_ok(attr->map_flags) || attr->key_size < LPM_KEY_SIZE_MIN || attr->key_size > LPM_KEY_SIZE_MAX || attr->value_size < LPM_VAL_SIZE_MIN || attr->value_size > LPM_VAL_SIZE_MAX) return ERR_PTR(-EINVAL); trie = bpf_map_area_alloc(sizeof(*trie), NUMA_NO_NODE); if (!trie) return ERR_PTR(-ENOMEM); /* copy mandatory map attributes */ bpf_map_init_from_attr(&trie->map, attr); trie->data_size = attr->key_size - offsetof(struct bpf_lpm_trie_key_u8, data); trie->max_prefixlen = trie->data_size * 8; spin_lock_init(&trie->lock); return &trie->map; } static void trie_free(struct bpf_map *map) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct lpm_trie_node __rcu **slot; struct lpm_trie_node *node; /* Always start at the root and walk down to a node that has no * children. Then free that node, nullify its reference in the parent * and start over. */ for (;;) { slot = &trie->root; for (;;) { node = rcu_dereference_protected(*slot, 1); if (!node) goto out; if (rcu_access_pointer(node->child[0])) { slot = &node->child[0]; continue; } if (rcu_access_pointer(node->child[1])) { slot = &node->child[1]; continue; } kfree(node); RCU_INIT_POINTER(*slot, NULL); break; } } out: bpf_map_area_free(trie); } static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) { struct lpm_trie_node *node, *next_node = NULL, *parent, *search_root; struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct bpf_lpm_trie_key_u8 *key = _key, *next_key = _next_key; struct lpm_trie_node **node_stack = NULL; int err = 0, stack_ptr = -1; unsigned int next_bit; size_t matchlen; /* The get_next_key follows postorder. For the 4 node example in * the top of this file, the trie_get_next_key() returns the following * one after another: * 192.168.0.0/24 * 192.168.1.0/24 * 192.168.128.0/24 * 192.168.0.0/16 * * The idea is to return more specific keys before less specific ones. */ /* Empty trie */ search_root = rcu_dereference(trie->root); if (!search_root) return -ENOENT; /* For invalid key, find the leftmost node in the trie */ if (!key || key->prefixlen > trie->max_prefixlen) goto find_leftmost; node_stack = kmalloc_array(trie->max_prefixlen, sizeof(struct lpm_trie_node *), GFP_ATOMIC | __GFP_NOWARN); if (!node_stack) return -ENOMEM; /* Try to find the exact node for the given key */ for (node = search_root; node;) { node_stack[++stack_ptr] = node; matchlen = longest_prefix_match(trie, node, key); if (node->prefixlen != matchlen || node->prefixlen == key->prefixlen) break; next_bit = extract_bit(key->data, node->prefixlen); node = rcu_dereference(node->child[next_bit]); } if (!node || node->prefixlen != key->prefixlen || (node->flags & LPM_TREE_NODE_FLAG_IM)) goto find_leftmost; /* The node with the exactly-matching key has been found, * find the first node in postorder after the matched node. */ node = node_stack[stack_ptr]; while (stack_ptr > 0) { parent = node_stack[stack_ptr - 1]; if (rcu_dereference(parent->child[0]) == node) { search_root = rcu_dereference(parent->child[1]); if (search_root) goto find_leftmost; } if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) { next_node = parent; goto do_copy; } node = parent; stack_ptr--; } /* did not find anything */ err = -ENOENT; goto free_stack; find_leftmost: /* Find the leftmost non-intermediate node, all intermediate nodes * have exact two children, so this function will never return NULL. */ for (node = search_root; node;) { if (node->flags & LPM_TREE_NODE_FLAG_IM) { node = rcu_dereference(node->child[0]); } else { next_node = node; node = rcu_dereference(node->child[0]); if (!node) node = rcu_dereference(next_node->child[1]); } } do_copy: next_key->prefixlen = next_node->prefixlen; memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key_u8, data), next_node->data, trie->data_size); free_stack: kfree(node_stack); return err; } static int trie_check_btf(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) { /* Keys must have struct bpf_lpm_trie_key_u8 embedded. */ return BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ? -EINVAL : 0; } static u64 trie_mem_usage(const struct bpf_map *map) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); u64 elem_size; elem_size = sizeof(struct lpm_trie_node) + trie->data_size + trie->map.value_size; return elem_size * READ_ONCE(trie->n_entries); } BTF_ID_LIST_SINGLE(trie_map_btf_ids, struct, lpm_trie) const struct bpf_map_ops trie_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = trie_alloc, .map_free = trie_free, .map_get_next_key = trie_get_next_key, .map_lookup_elem = trie_lookup_elem, .map_update_elem = trie_update_elem, .map_delete_elem = trie_delete_elem, .map_lookup_batch = generic_map_lookup_batch, .map_update_batch = generic_map_update_batch, .map_delete_batch = generic_map_delete_batch, .map_check_btf = trie_check_btf, .map_mem_usage = trie_mem_usage, .map_btf_id = &trie_map_btf_ids[0], };
7 7 7 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 // SPDX-License-Identifier: GPL-2.0 /* * transport_class.c - implementation of generic transport classes * using attribute_containers * * Copyright (c) 2005 - James Bottomley <James.Bottomley@steeleye.com> * * The basic idea here is to allow any "device controller" (which * would most often be a Host Bus Adapter to use the services of one * or more tranport classes for performing transport specific * services. Transport specific services are things that the generic * command layer doesn't want to know about (speed settings, line * condidtioning, etc), but which the user might be interested in. * Thus, the HBA's use the routines exported by the transport classes * to perform these functions. The transport classes export certain * values to the user via sysfs using attribute containers. * * Note: because not every HBA will care about every transport * attribute, there's a many to one relationship that goes like this: * * transport class<-----attribute container<----class device * * Usually the attribute container is per-HBA, but the design doesn't * mandate that. Although most of the services will be specific to * the actual external storage connection used by the HBA, the generic * transport class is framed entirely in terms of generic devices to * allow it to be used by any physical HBA in the system. */ #include <linux/export.h> #include <linux/attribute_container.h> #include <linux/transport_class.h> static int transport_remove_classdev(struct attribute_container *cont, struct device *dev, struct device *classdev); /** * transport_class_register - register an initial transport class * * @tclass: a pointer to the transport class structure to be initialised * * The transport class contains an embedded class which is used to * identify it. The caller should initialise this structure with * zeros and then generic class must have been initialised with the * actual transport class unique name. There's a macro * DECLARE_TRANSPORT_CLASS() to do this (declared classes still must * be registered). * * Returns 0 on success or error on failure. */ int transport_class_register(struct transport_class *tclass) { return class_register(&tclass->class); } EXPORT_SYMBOL_GPL(transport_class_register); /** * transport_class_unregister - unregister a previously registered class * * @tclass: The transport class to unregister * * Must be called prior to deallocating the memory for the transport * class. */ void transport_class_unregister(struct transport_class *tclass) { class_unregister(&tclass->class); } EXPORT_SYMBOL_GPL(transport_class_unregister); static int anon_transport_dummy_function(struct transport_container *tc, struct device *dev, struct device *cdev) { /* do nothing */ return 0; } /** * anon_transport_class_register - register an anonymous class * * @atc: The anon transport class to register * * The anonymous transport class contains both a transport class and a * container. The idea of an anonymous class is that it never * actually has any device attributes associated with it (and thus * saves on container storage). So it can only be used for triggering * events. Use prezero and then use DECLARE_ANON_TRANSPORT_CLASS() to * initialise the anon transport class storage. */ int anon_transport_class_register(struct anon_transport_class *atc) { int error; atc->container.class = &atc->tclass.class; attribute_container_set_no_classdevs(&atc->container); error = attribute_container_register(&atc->container); if (error) return error; atc->tclass.setup = anon_transport_dummy_function; atc->tclass.remove = anon_transport_dummy_function; return 0; } EXPORT_SYMBOL_GPL(anon_transport_class_register); /** * anon_transport_class_unregister - unregister an anon class * * @atc: Pointer to the anon transport class to unregister * * Must be called prior to deallocating the memory for the anon * transport class. */ void anon_transport_class_unregister(struct anon_transport_class *atc) { if (unlikely(attribute_container_unregister(&atc->container))) BUG(); } EXPORT_SYMBOL_GPL(anon_transport_class_unregister); static int transport_setup_classdev(struct attribute_container *cont, struct device *dev, struct device *classdev) { struct transport_class *tclass = class_to_transport_class(cont->class); struct transport_container *tcont = attribute_container_to_transport_container(cont); if (tclass->setup) tclass->setup(tcont, dev, classdev); return 0; } /** * transport_setup_device - declare a new dev for transport class association but don't make it visible yet. * @dev: the generic device representing the entity being added * * Usually, dev represents some component in the HBA system (either * the HBA itself or a device remote across the HBA bus). This * routine is simply a trigger point to see if any set of transport * classes wishes to associate with the added device. This allocates * storage for the class device and initialises it, but does not yet * add it to the system or add attributes to it (you do this with * transport_add_device). If you have no need for a separate setup * and add operations, use transport_register_device (see * transport_class.h). */ void transport_setup_device(struct device *dev) { attribute_container_add_device(dev, transport_setup_classdev); } EXPORT_SYMBOL_GPL(transport_setup_device); static int transport_add_class_device(struct attribute_container *cont, struct device *dev, struct device *classdev) { struct transport_class *tclass = class_to_transport_class(cont->class); int error = attribute_container_add_class_device(classdev); struct transport_container *tcont = attribute_container_to_transport_container(cont); if (error) goto err_remove; if (tcont->statistics) { error = sysfs_create_group(&classdev->kobj, tcont->statistics); if (error) goto err_del; } return 0; err_del: attribute_container_class_device_del(classdev); err_remove: if (tclass->remove) tclass->remove(tcont, dev, classdev); return error; } /** * transport_add_device - declare a new dev for transport class association * * @dev: the generic device representing the entity being added * * Usually, dev represents some component in the HBA system (either * the HBA itself or a device remote across the HBA bus). This * routine is simply a trigger point used to add the device to the * system and register attributes for it. */ int transport_add_device(struct device *dev) { return attribute_container_device_trigger_safe(dev, transport_add_class_device, transport_remove_classdev); } EXPORT_SYMBOL_GPL(transport_add_device); static int transport_configure(struct attribute_container *cont, struct device *dev, struct device *cdev) { struct transport_class *tclass = class_to_transport_class(cont->class); struct transport_container *tcont = attribute_container_to_transport_container(cont); if (tclass->configure) tclass->configure(tcont, dev, cdev); return 0; } /** * transport_configure_device - configure an already set up device * * @dev: generic device representing device to be configured * * The idea of configure is simply to provide a point within the setup * process to allow the transport class to extract information from a * device after it has been setup. This is used in SCSI because we * have to have a setup device to begin using the HBA, but after we * send the initial inquiry, we use configure to extract the device * parameters. The device need not have been added to be configured. */ void transport_configure_device(struct device *dev) { attribute_container_device_trigger(dev, transport_configure); } EXPORT_SYMBOL_GPL(transport_configure_device); static int transport_remove_classdev(struct attribute_container *cont, struct device *dev, struct device *classdev) { struct transport_container *tcont = attribute_container_to_transport_container(cont); struct transport_class *tclass = class_to_transport_class(cont->class); if (tclass->remove) tclass->remove(tcont, dev, classdev); if (tclass->remove != anon_transport_dummy_function) { if (tcont->statistics) sysfs_remove_group(&classdev->kobj, tcont->statistics); attribute_container_class_device_del(classdev); } return 0; } /** * transport_remove_device - remove the visibility of a device * * @dev: generic device to remove * * This call removes the visibility of the device (to the user from * sysfs), but does not destroy it. To eliminate a device entirely * you must also call transport_destroy_device. If you don't need to * do remove and destroy as separate operations, use * transport_unregister_device() (see transport_class.h) which will * perform both calls for you. */ void transport_remove_device(struct device *dev) { attribute_container_device_trigger(dev, transport_remove_classdev); } EXPORT_SYMBOL_GPL(transport_remove_device); static void transport_destroy_classdev(struct attribute_container *cont, struct device *dev, struct device *classdev) { struct transport_class *tclass = class_to_transport_class(cont->class); if (tclass->remove != anon_transport_dummy_function) put_device(classdev); } /** * transport_destroy_device - destroy a removed device * * @dev: device to eliminate from the transport class. * * This call triggers the elimination of storage associated with the * transport classdev. Note: all it really does is relinquish a * reference to the classdev. The memory will not be freed until the * last reference goes to zero. Note also that the classdev retains a * reference count on dev, so dev too will remain for as long as the * transport class device remains around. */ void transport_destroy_device(struct device *dev) { attribute_container_remove_device(dev, transport_destroy_classdev); } EXPORT_SYMBOL_GPL(transport_destroy_device);
11 11 11 11 3 11 10 11 5 6 11 11 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> */ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/pagemap.h> #include <linux/mutex.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs_fs_i.h" #include "squashfs.h" #include "page_actor.h" /* Read separately compressed datablock directly into page cache */ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, int expected) { struct folio *folio = page_folio(target_page); struct inode *inode = target_page->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; loff_t file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT; int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; loff_t start_index = folio->index & ~mask; loff_t end_index = start_index | mask; int i, n, pages, bytes, res = -ENOMEM; struct page **page, *last_page; struct squashfs_page_actor *actor; void *pageaddr; if (end_index > file_end) end_index = file_end; pages = end_index - start_index + 1; page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL); if (page == NULL) return res; /* Try to grab all the pages covered by the Squashfs block */ for (i = 0, n = start_index; n <= end_index; n++) { page[i] = (n == folio->index) ? target_page : grab_cache_page_nowait(target_page->mapping, n); if (page[i] == NULL) continue; if (PageUptodate(page[i])) { unlock_page(page[i]); put_page(page[i]); continue; } i++; } pages = i; /* * Create a "page actor" which will kmap and kunmap the * page cache pages appropriately within the decompressor */ actor = squashfs_page_actor_init_special(msblk, page, pages, expected, start_index << PAGE_SHIFT); if (actor == NULL) goto out; /* Decompress directly into the page cache buffers */ res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); last_page = squashfs_page_actor_free(actor); if (res < 0) goto mark_errored; if (res != expected || IS_ERR(last_page)) { res = -EIO; goto mark_errored; } /* Last page (if present) may have trailing bytes not filled */ bytes = res % PAGE_SIZE; if (end_index == file_end && last_page && bytes) { pageaddr = kmap_local_page(last_page); memset(pageaddr + bytes, 0, PAGE_SIZE - bytes); kunmap_local(pageaddr); } /* Mark pages as uptodate, unlock and release */ for (i = 0; i < pages; i++) { flush_dcache_page(page[i]); SetPageUptodate(page[i]); unlock_page(page[i]); if (page[i] != target_page) put_page(page[i]); } kfree(page); return 0; mark_errored: /* Decompression failed. Target_page is * dealt with by the caller */ for (i = 0; i < pages; i++) { if (page[i] == NULL || page[i] == target_page) continue; flush_dcache_page(page[i]); unlock_page(page[i]); put_page(page[i]); } out: kfree(page); return res; }
90 3 87 7 3 47 83 2 1 2 41 4 1 16 3 3 4 11 3 8 11 2 36 34 30 3 6 3 6 1 18 2 1 16 3 1 8 11 3 8 11 2 14 14 8 2 9 4 11 1 2 22 1 2 17 99 3 1 50 25 20 2 1 1 1 1 1 1 1 1 1 3 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 // SPDX-License-Identifier: GPL-2.0-or-later /* * NetLabel CIPSO/IPv4 Support * * This file defines the CIPSO/IPv4 functions for the NetLabel system. The * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * * Author: Paul Moore <paul@paul-moore.com> */ /* * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 */ #include <linux/types.h> #include <linux/socket.h> #include <linux/string.h> #include <linux/skbuff.h> #include <linux/audit.h> #include <linux/slab.h> #include <net/sock.h> #include <net/netlink.h> #include <net/genetlink.h> #include <net/netlabel.h> #include <net/cipso_ipv4.h> #include <linux/atomic.h> #include "netlabel_user.h" #include "netlabel_cipso_v4.h" #include "netlabel_mgmt.h" #include "netlabel_domainhash.h" /* Argument struct for cipso_v4_doi_walk() */ struct netlbl_cipsov4_doiwalk_arg { struct netlink_callback *nl_cb; struct sk_buff *skb; u32 seq; }; /* Argument struct for netlbl_domhsh_walk() */ struct netlbl_domhsh_walk_arg { struct netlbl_audit *audit_info; u32 doi; }; /* NetLabel Generic NETLINK CIPSOv4 family */ static struct genl_family netlbl_cipsov4_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = { [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 }, [NLBL_CIPSOV4_A_MTYPE] = { .type = NLA_U32 }, [NLBL_CIPSOV4_A_TAG] = { .type = NLA_U8 }, [NLBL_CIPSOV4_A_TAGLST] = { .type = NLA_NESTED }, [NLBL_CIPSOV4_A_MLSLVLLOC] = { .type = NLA_U32 }, [NLBL_CIPSOV4_A_MLSLVLREM] = { .type = NLA_U32 }, [NLBL_CIPSOV4_A_MLSLVL] = { .type = NLA_NESTED }, [NLBL_CIPSOV4_A_MLSLVLLST] = { .type = NLA_NESTED }, [NLBL_CIPSOV4_A_MLSCATLOC] = { .type = NLA_U32 }, [NLBL_CIPSOV4_A_MLSCATREM] = { .type = NLA_U32 }, [NLBL_CIPSOV4_A_MLSCAT] = { .type = NLA_NESTED }, [NLBL_CIPSOV4_A_MLSCATLST] = { .type = NLA_NESTED }, }; /* * Helper Functions */ /** * netlbl_cipsov4_add_common - Parse the common sections of a ADD message * @info: the Generic NETLINK info block * @doi_def: the CIPSO V4 DOI definition * * Description: * Parse the common sections of a ADD message and fill in the related values * in @doi_def. Returns zero on success, negative values on failure. * */ static int netlbl_cipsov4_add_common(struct genl_info *info, struct cipso_v4_doi *doi_def) { struct nlattr *nla; int nla_rem; u32 iter = 0; doi_def->doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); if (nla_validate_nested_deprecated(info->attrs[NLBL_CIPSOV4_A_TAGLST], NLBL_CIPSOV4_A_MAX, netlbl_cipsov4_genl_policy, NULL) != 0) return -EINVAL; nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem) if (nla_type(nla) == NLBL_CIPSOV4_A_TAG) { if (iter >= CIPSO_V4_TAG_MAXCNT) return -EINVAL; doi_def->tags[iter++] = nla_get_u8(nla); } while (iter < CIPSO_V4_TAG_MAXCNT) doi_def->tags[iter++] = CIPSO_V4_TAG_INVALID; return 0; } /* * NetLabel Command Handlers */ /** * netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition * @info: the Generic NETLINK info block * @audit_info: NetLabel audit information * * Description: * Create a new CIPSO_V4_MAP_TRANS DOI definition based on the given ADD * message and add it to the CIPSO V4 engine. Return zero on success and * non-zero on error. * */ static int netlbl_cipsov4_add_std(struct genl_info *info, struct netlbl_audit *audit_info) { int ret_val = -EINVAL; struct cipso_v4_doi *doi_def = NULL; struct nlattr *nla_a; struct nlattr *nla_b; int nla_a_rem; int nla_b_rem; u32 iter; if (!info->attrs[NLBL_CIPSOV4_A_TAGLST] || !info->attrs[NLBL_CIPSOV4_A_MLSLVLLST]) return -EINVAL; if (nla_validate_nested_deprecated(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], NLBL_CIPSOV4_A_MAX, netlbl_cipsov4_genl_policy, NULL) != 0) return -EINVAL; doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); if (doi_def == NULL) return -ENOMEM; doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL); if (doi_def->map.std == NULL) { kfree(doi_def); return -ENOMEM; } doi_def->type = CIPSO_V4_MAP_TRANS; ret_val = netlbl_cipsov4_add_common(info, doi_def); if (ret_val != 0) goto add_std_failure; ret_val = -EINVAL; nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], nla_a_rem) if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) { if (nla_validate_nested_deprecated(nla_a, NLBL_CIPSOV4_A_MAX, netlbl_cipsov4_genl_policy, NULL) != 0) goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) switch (nla_type(nla_b)) { case NLBL_CIPSOV4_A_MLSLVLLOC: if (nla_get_u32(nla_b) > CIPSO_V4_MAX_LOC_LVLS) goto add_std_failure; if (nla_get_u32(nla_b) >= doi_def->map.std->lvl.local_size) doi_def->map.std->lvl.local_size = nla_get_u32(nla_b) + 1; break; case NLBL_CIPSOV4_A_MLSLVLREM: if (nla_get_u32(nla_b) > CIPSO_V4_MAX_REM_LVLS) goto add_std_failure; if (nla_get_u32(nla_b) >= doi_def->map.std->lvl.cipso_size) doi_def->map.std->lvl.cipso_size = nla_get_u32(nla_b) + 1; break; } } doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size, sizeof(u32), GFP_KERNEL | __GFP_NOWARN); if (doi_def->map.std->lvl.local == NULL) { ret_val = -ENOMEM; goto add_std_failure; } doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size, sizeof(u32), GFP_KERNEL | __GFP_NOWARN); if (doi_def->map.std->lvl.cipso == NULL) { ret_val = -ENOMEM; goto add_std_failure; } for (iter = 0; iter < doi_def->map.std->lvl.local_size; iter++) doi_def->map.std->lvl.local[iter] = CIPSO_V4_INV_LVL; for (iter = 0; iter < doi_def->map.std->lvl.cipso_size; iter++) doi_def->map.std->lvl.cipso[iter] = CIPSO_V4_INV_LVL; nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], nla_a_rem) if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) { struct nlattr *lvl_loc; struct nlattr *lvl_rem; lvl_loc = nla_find_nested(nla_a, NLBL_CIPSOV4_A_MLSLVLLOC); lvl_rem = nla_find_nested(nla_a, NLBL_CIPSOV4_A_MLSLVLREM); if (lvl_loc == NULL || lvl_rem == NULL) goto add_std_failure; doi_def->map.std->lvl.local[nla_get_u32(lvl_loc)] = nla_get_u32(lvl_rem); doi_def->map.std->lvl.cipso[nla_get_u32(lvl_rem)] = nla_get_u32(lvl_loc); } if (info->attrs[NLBL_CIPSOV4_A_MLSCATLST]) { if (nla_validate_nested_deprecated(info->attrs[NLBL_CIPSOV4_A_MLSCATLST], NLBL_CIPSOV4_A_MAX, netlbl_cipsov4_genl_policy, NULL) != 0) goto add_std_failure; nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSCATLST], nla_a_rem) if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) { if (nla_validate_nested_deprecated(nla_a, NLBL_CIPSOV4_A_MAX, netlbl_cipsov4_genl_policy, NULL) != 0) goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) switch (nla_type(nla_b)) { case NLBL_CIPSOV4_A_MLSCATLOC: if (nla_get_u32(nla_b) > CIPSO_V4_MAX_LOC_CATS) goto add_std_failure; if (nla_get_u32(nla_b) >= doi_def->map.std->cat.local_size) doi_def->map.std->cat.local_size = nla_get_u32(nla_b) + 1; break; case NLBL_CIPSOV4_A_MLSCATREM: if (nla_get_u32(nla_b) > CIPSO_V4_MAX_REM_CATS) goto add_std_failure; if (nla_get_u32(nla_b) >= doi_def->map.std->cat.cipso_size) doi_def->map.std->cat.cipso_size = nla_get_u32(nla_b) + 1; break; } } doi_def->map.std->cat.local = kcalloc( doi_def->map.std->cat.local_size, sizeof(u32), GFP_KERNEL | __GFP_NOWARN); if (doi_def->map.std->cat.local == NULL) { ret_val = -ENOMEM; goto add_std_failure; } doi_def->map.std->cat.cipso = kcalloc( doi_def->map.std->cat.cipso_size, sizeof(u32), GFP_KERNEL | __GFP_NOWARN); if (doi_def->map.std->cat.cipso == NULL) { ret_val = -ENOMEM; goto add_std_failure; } for (iter = 0; iter < doi_def->map.std->cat.local_size; iter++) doi_def->map.std->cat.local[iter] = CIPSO_V4_INV_CAT; for (iter = 0; iter < doi_def->map.std->cat.cipso_size; iter++) doi_def->map.std->cat.cipso[iter] = CIPSO_V4_INV_CAT; nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSCATLST], nla_a_rem) if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) { struct nlattr *cat_loc; struct nlattr *cat_rem; cat_loc = nla_find_nested(nla_a, NLBL_CIPSOV4_A_MLSCATLOC); cat_rem = nla_find_nested(nla_a, NLBL_CIPSOV4_A_MLSCATREM); if (cat_loc == NULL || cat_rem == NULL) goto add_std_failure; doi_def->map.std->cat.local[ nla_get_u32(cat_loc)] = nla_get_u32(cat_rem); doi_def->map.std->cat.cipso[ nla_get_u32(cat_rem)] = nla_get_u32(cat_loc); } } ret_val = cipso_v4_doi_add(doi_def, audit_info); if (ret_val != 0) goto add_std_failure; return 0; add_std_failure: cipso_v4_doi_free(doi_def); return ret_val; } /** * netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition * @info: the Generic NETLINK info block * @audit_info: NetLabel audit information * * Description: * Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message * and add it to the CIPSO V4 engine. Return zero on success and non-zero on * error. * */ static int netlbl_cipsov4_add_pass(struct genl_info *info, struct netlbl_audit *audit_info) { int ret_val; struct cipso_v4_doi *doi_def = NULL; if (!info->attrs[NLBL_CIPSOV4_A_TAGLST]) return -EINVAL; doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); if (doi_def == NULL) return -ENOMEM; doi_def->type = CIPSO_V4_MAP_PASS; ret_val = netlbl_cipsov4_add_common(info, doi_def); if (ret_val != 0) goto add_pass_failure; ret_val = cipso_v4_doi_add(doi_def, audit_info); if (ret_val != 0) goto add_pass_failure; return 0; add_pass_failure: cipso_v4_doi_free(doi_def); return ret_val; } /** * netlbl_cipsov4_add_local - Adds a CIPSO V4 DOI definition * @info: the Generic NETLINK info block * @audit_info: NetLabel audit information * * Description: * Create a new CIPSO_V4_MAP_LOCAL DOI definition based on the given ADD * message and add it to the CIPSO V4 engine. Return zero on success and * non-zero on error. * */ static int netlbl_cipsov4_add_local(struct genl_info *info, struct netlbl_audit *audit_info) { int ret_val; struct cipso_v4_doi *doi_def = NULL; if (!info->attrs[NLBL_CIPSOV4_A_TAGLST]) return -EINVAL; doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); if (doi_def == NULL) return -ENOMEM; doi_def->type = CIPSO_V4_MAP_LOCAL; ret_val = netlbl_cipsov4_add_common(info, doi_def); if (ret_val != 0) goto add_local_failure; ret_val = cipso_v4_doi_add(doi_def, audit_info); if (ret_val != 0) goto add_local_failure; return 0; add_local_failure: cipso_v4_doi_free(doi_def); return ret_val; } /** * netlbl_cipsov4_add - Handle an ADD message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Create a new DOI definition based on the given ADD message and add it to the * CIPSO V4 engine. Returns zero on success, negative values on failure. * */ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) { int ret_val = -EINVAL; struct netlbl_audit audit_info; if (!info->attrs[NLBL_CIPSOV4_A_DOI] || !info->attrs[NLBL_CIPSOV4_A_MTYPE]) return -EINVAL; netlbl_netlink_auditinfo(&audit_info); switch (nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE])) { case CIPSO_V4_MAP_TRANS: ret_val = netlbl_cipsov4_add_std(info, &audit_info); break; case CIPSO_V4_MAP_PASS: ret_val = netlbl_cipsov4_add_pass(info, &audit_info); break; case CIPSO_V4_MAP_LOCAL: ret_val = netlbl_cipsov4_add_local(info, &audit_info); break; } if (ret_val == 0) atomic_inc(&netlabel_mgmt_protocount); return ret_val; } /** * netlbl_cipsov4_list - Handle a LIST message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated LIST message and respond accordingly. While the * response message generated by the kernel is straightforward, determining * before hand the size of the buffer to allocate is not (we have to generate * the message to know the size). In order to keep this function sane what we * do is allocate a buffer of NLMSG_GOODSIZE and try to fit the response in * that size, if we fail then we restart with a larger buffer and try again. * We continue in this manner until we hit a limit of failed attempts then we * give up and just send an error message. Returns zero on success and * negative values on error. * */ static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info) { int ret_val; struct sk_buff *ans_skb = NULL; u32 nlsze_mult = 1; void *data; u32 doi; struct nlattr *nla_a; struct nlattr *nla_b; struct cipso_v4_doi *doi_def; u32 iter; if (!info->attrs[NLBL_CIPSOV4_A_DOI]) { ret_val = -EINVAL; goto list_failure; } list_start: ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE * nlsze_mult, GFP_KERNEL); if (ans_skb == NULL) { ret_val = -ENOMEM; goto list_failure; } data = genlmsg_put_reply(ans_skb, info, &netlbl_cipsov4_gnl_family, 0, NLBL_CIPSOV4_C_LIST); if (data == NULL) { ret_val = -ENOMEM; goto list_failure; } doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); rcu_read_lock(); doi_def = cipso_v4_doi_getdef(doi); if (doi_def == NULL) { ret_val = -EINVAL; goto list_failure_lock; } ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type); if (ret_val != 0) goto list_failure_lock; nla_a = nla_nest_start_noflag(ans_skb, NLBL_CIPSOV4_A_TAGLST); if (nla_a == NULL) { ret_val = -ENOMEM; goto list_failure_lock; } for (iter = 0; iter < CIPSO_V4_TAG_MAXCNT && doi_def->tags[iter] != CIPSO_V4_TAG_INVALID; iter++) { ret_val = nla_put_u8(ans_skb, NLBL_CIPSOV4_A_TAG, doi_def->tags[iter]); if (ret_val != 0) goto list_failure_lock; } nla_nest_end(ans_skb, nla_a); switch (doi_def->type) { case CIPSO_V4_MAP_TRANS: nla_a = nla_nest_start_noflag(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST); if (nla_a == NULL) { ret_val = -ENOMEM; goto list_failure_lock; } for (iter = 0; iter < doi_def->map.std->lvl.local_size; iter++) { if (doi_def->map.std->lvl.local[iter] == CIPSO_V4_INV_LVL) continue; nla_b = nla_nest_start_noflag(ans_skb, NLBL_CIPSOV4_A_MLSLVL); if (nla_b == NULL) { ret_val = -ENOMEM; goto list_retry; } ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MLSLVLLOC, iter); if (ret_val != 0) goto list_retry; ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MLSLVLREM, doi_def->map.std->lvl.local[iter]); if (ret_val != 0) goto list_retry; nla_nest_end(ans_skb, nla_b); } nla_nest_end(ans_skb, nla_a); nla_a = nla_nest_start_noflag(ans_skb, NLBL_CIPSOV4_A_MLSCATLST); if (nla_a == NULL) { ret_val = -ENOMEM; goto list_retry; } for (iter = 0; iter < doi_def->map.std->cat.local_size; iter++) { if (doi_def->map.std->cat.local[iter] == CIPSO_V4_INV_CAT) continue; nla_b = nla_nest_start_noflag(ans_skb, NLBL_CIPSOV4_A_MLSCAT); if (nla_b == NULL) { ret_val = -ENOMEM; goto list_retry; } ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MLSCATLOC, iter); if (ret_val != 0) goto list_retry; ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MLSCATREM, doi_def->map.std->cat.local[iter]); if (ret_val != 0) goto list_retry; nla_nest_end(ans_skb, nla_b); } nla_nest_end(ans_skb, nla_a); break; } cipso_v4_doi_putdef(doi_def); rcu_read_unlock(); genlmsg_end(ans_skb, data); return genlmsg_reply(ans_skb, info); list_retry: /* XXX - this limit is a guesstimate */ if (nlsze_mult < 4) { cipso_v4_doi_putdef(doi_def); rcu_read_unlock(); kfree_skb(ans_skb); nlsze_mult *= 2; goto list_start; } list_failure_lock: cipso_v4_doi_putdef(doi_def); rcu_read_unlock(); list_failure: kfree_skb(ans_skb); return ret_val; } /** * netlbl_cipsov4_listall_cb - cipso_v4_doi_walk() callback for LISTALL * @doi_def: the CIPSOv4 DOI definition * @arg: the netlbl_cipsov4_doiwalk_arg structure * * Description: * This function is designed to be used as a callback to the * cipso_v4_doi_walk() function for use in generating a response for a LISTALL * message. Returns the size of the message on success, negative values on * failure. * */ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg) { int ret_val = -ENOMEM; struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg; void *data; data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_cipsov4_gnl_family, NLM_F_MULTI, NLBL_CIPSOV4_C_LISTALL); if (data == NULL) goto listall_cb_failure; ret_val = nla_put_u32(cb_arg->skb, NLBL_CIPSOV4_A_DOI, doi_def->doi); if (ret_val != 0) goto listall_cb_failure; ret_val = nla_put_u32(cb_arg->skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type); if (ret_val != 0) goto listall_cb_failure; genlmsg_end(cb_arg->skb, data); return 0; listall_cb_failure: genlmsg_cancel(cb_arg->skb, data); return ret_val; } /** * netlbl_cipsov4_listall - Handle a LISTALL message * @skb: the NETLINK buffer * @cb: the NETLINK callback * * Description: * Process a user generated LISTALL message and respond accordingly. Returns * zero on success and negative values on error. * */ static int netlbl_cipsov4_listall(struct sk_buff *skb, struct netlink_callback *cb) { struct netlbl_cipsov4_doiwalk_arg cb_arg; u32 doi_skip = cb->args[0]; cb_arg.nl_cb = cb; cb_arg.skb = skb; cb_arg.seq = cb->nlh->nlmsg_seq; cipso_v4_doi_walk(&doi_skip, netlbl_cipsov4_listall_cb, &cb_arg); cb->args[0] = doi_skip; return skb->len; } /** * netlbl_cipsov4_remove_cb - netlbl_cipsov4_remove() callback for REMOVE * @entry: LSM domain mapping entry * @arg: the netlbl_domhsh_walk_arg structure * * Description: * This function is intended for use by netlbl_cipsov4_remove() as the callback * for the netlbl_domhsh_walk() function; it removes LSM domain map entries * which are associated with the CIPSO DOI specified in @arg. Returns zero on * success, negative values on failure. * */ static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg) { struct netlbl_domhsh_walk_arg *cb_arg = arg; if (entry->def.type == NETLBL_NLTYPE_CIPSOV4 && entry->def.cipso->doi == cb_arg->doi) return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info); return 0; } /** * netlbl_cipsov4_remove - Handle a REMOVE message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated REMOVE message and respond accordingly. Returns * zero on success, negative values on failure. * */ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) { int ret_val = -EINVAL; struct netlbl_domhsh_walk_arg cb_arg; struct netlbl_audit audit_info; u32 skip_bkt = 0; u32 skip_chain = 0; if (!info->attrs[NLBL_CIPSOV4_A_DOI]) return -EINVAL; netlbl_netlink_auditinfo(&audit_info); cb_arg.doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); cb_arg.audit_info = &audit_info; ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain, netlbl_cipsov4_remove_cb, &cb_arg); if (ret_val == 0 || ret_val == -ENOENT) { ret_val = cipso_v4_doi_remove(cb_arg.doi, &audit_info); if (ret_val == 0) atomic_dec(&netlabel_mgmt_protocount); } return ret_val; } /* * NetLabel Generic NETLINK Command Definitions */ static const struct genl_small_ops netlbl_cipsov4_ops[] = { { .cmd = NLBL_CIPSOV4_C_ADD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_cipsov4_add, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_REMOVE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_cipsov4_remove, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_LIST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = netlbl_cipsov4_list, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_LISTALL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = NULL, .dumpit = netlbl_cipsov4_listall, }, }; static struct genl_family netlbl_cipsov4_gnl_family __ro_after_init = { .hdrsize = 0, .name = NETLBL_NLTYPE_CIPSOV4_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_CIPSOV4_A_MAX, .policy = netlbl_cipsov4_genl_policy, .module = THIS_MODULE, .small_ops = netlbl_cipsov4_ops, .n_small_ops = ARRAY_SIZE(netlbl_cipsov4_ops), .resv_start_op = NLBL_CIPSOV4_C_LISTALL + 1, }; /* * NetLabel Generic NETLINK Protocol Functions */ /** * netlbl_cipsov4_genl_init - Register the CIPSOv4 NetLabel component * * Description: * Register the CIPSOv4 packet NetLabel component with the Generic NETLINK * mechanism. Returns zero on success, negative values on failure. * */ int __init netlbl_cipsov4_genl_init(void) { return genl_register_family(&netlbl_cipsov4_gnl_family); }
72 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) ST-Ericsson AB 2010 * Author: Sjur Brendeland */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ #include <linux/kernel.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/pkt_sched.h> #include <net/caif/caif_layer.h> #include <net/caif/cfsrvl.h> #include <net/caif/cfpkt.h> #include <net/caif/caif_dev.h> #define SRVL_CTRL_PKT_SIZE 1 #define SRVL_FLOW_OFF 0x81 #define SRVL_FLOW_ON 0x80 #define SRVL_SET_PIN 0x82 #define container_obj(layr) container_of(layr, struct cfsrvl, layer) static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, int phyid) { struct cfsrvl *service = container_obj(layr); if (layr->up == NULL || layr->up->ctrlcmd == NULL) return; switch (ctrl) { case CAIF_CTRLCMD_INIT_RSP: service->open = true; layr->up->ctrlcmd(layr->up, ctrl, phyid); break; case CAIF_CTRLCMD_DEINIT_RSP: case CAIF_CTRLCMD_INIT_FAIL_RSP: service->open = false; layr->up->ctrlcmd(layr->up, ctrl, phyid); break; case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND: if (phyid != service->dev_info.id) break; if (service->modem_flow_on) layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_FLOW_OFF_IND, phyid); service->phy_flow_on = false; break; case _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND: if (phyid != service->dev_info.id) return; if (service->modem_flow_on) { layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_FLOW_ON_IND, phyid); } service->phy_flow_on = true; break; case CAIF_CTRLCMD_FLOW_OFF_IND: if (service->phy_flow_on) { layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_FLOW_OFF_IND, phyid); } service->modem_flow_on = false; break; case CAIF_CTRLCMD_FLOW_ON_IND: if (service->phy_flow_on) { layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_FLOW_ON_IND, phyid); } service->modem_flow_on = true; break; case _CAIF_CTRLCMD_PHYIF_DOWN_IND: /* In case interface is down, let's fake a remove shutdown */ layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, phyid); break; case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND: layr->up->ctrlcmd(layr->up, ctrl, phyid); break; default: pr_warn("Unexpected ctrl in cfsrvl (%d)\n", ctrl); /* We have both modem and phy flow on, send flow on */ layr->up->ctrlcmd(layr->up, ctrl, phyid); service->phy_flow_on = true; break; } } static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) { struct cfsrvl *service = container_obj(layr); caif_assert(layr != NULL); caif_assert(layr->dn != NULL); caif_assert(layr->dn->transmit != NULL); if (!service->supports_flowctrl) return 0; switch (ctrl) { case CAIF_MODEMCMD_FLOW_ON_REQ: { struct cfpkt *pkt; struct caif_payload_info *info; u8 flow_on = SRVL_FLOW_ON; pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); if (!pkt) return -ENOMEM; if (cfpkt_add_head(pkt, &flow_on, 1) < 0) { pr_err("Packet is erroneous!\n"); cfpkt_destroy(pkt); return -EPROTO; } info = cfpkt_info(pkt); info->channel_id = service->layer.id; info->hdr_len = 1; info->dev_info = &service->dev_info; cfpkt_set_prio(pkt, TC_PRIO_CONTROL); return layr->dn->transmit(layr->dn, pkt); } case CAIF_MODEMCMD_FLOW_OFF_REQ: { struct cfpkt *pkt; struct caif_payload_info *info; u8 flow_off = SRVL_FLOW_OFF; pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); if (!pkt) return -ENOMEM; if (cfpkt_add_head(pkt, &flow_off, 1) < 0) { pr_err("Packet is erroneous!\n"); cfpkt_destroy(pkt); return -EPROTO; } info = cfpkt_info(pkt); info->channel_id = service->layer.id; info->hdr_len = 1; info->dev_info = &service->dev_info; cfpkt_set_prio(pkt, TC_PRIO_CONTROL); return layr->dn->transmit(layr->dn, pkt); } default: break; } return -EINVAL; } static void cfsrvl_release(struct cflayer *layer) { struct cfsrvl *service = container_of(layer, struct cfsrvl, layer); kfree(service); } void cfsrvl_init(struct cfsrvl *service, u8 channel_id, struct dev_info *dev_info, bool supports_flowctrl) { caif_assert(offsetof(struct cfsrvl, layer) == 0); service->open = false; service->modem_flow_on = true; service->phy_flow_on = true; service->layer.id = channel_id; service->layer.ctrlcmd = cfservl_ctrlcmd; service->layer.modemcmd = cfservl_modemcmd; service->dev_info = *dev_info; service->supports_flowctrl = supports_flowctrl; service->release = cfsrvl_release; } bool cfsrvl_ready(struct cfsrvl *service, int *err) { if (!service->open) { *err = -ENOTCONN; return false; } return true; } u8 cfsrvl_getphyid(struct cflayer *layer) { struct cfsrvl *servl = container_obj(layer); return servl->dev_info.id; } bool cfsrvl_phyid_match(struct cflayer *layer, int phyid) { struct cfsrvl *servl = container_obj(layer); return servl->dev_info.id == phyid; } void caif_free_client(struct cflayer *adap_layer) { struct cfsrvl *servl; if (adap_layer == NULL || adap_layer->dn == NULL) return; servl = container_obj(adap_layer->dn); servl->release(&servl->layer); } EXPORT_SYMBOL(caif_free_client); void caif_client_register_refcnt(struct cflayer *adapt_layer, void (*hold)(struct cflayer *lyr), void (*put)(struct cflayer *lyr)) { struct cfsrvl *service; if (WARN_ON(adapt_layer == NULL || adapt_layer->dn == NULL)) return; service = container_of(adapt_layer->dn, struct cfsrvl, layer); service->hold = hold; service->put = put; } EXPORT_SYMBOL(caif_client_register_refcnt);
1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 // SPDX-License-Identifier: GPL-2.0-only /* DVB USB compliant linux driver for Nebula Electronics uDigiTV DVB-T USB2.0 * receiver * * Copyright (C) 2005 Patrick Boettcher (patrick.boettcher@posteo.de) * * partly based on the SDK published by Nebula Electronics * * see Documentation/driver-api/media/drivers/dvb-usb.rst for more information */ #include "digitv.h" #include "mt352.h" #include "nxt6000.h" /* debug */ static int dvb_usb_digitv_debug; module_param_named(debug,dvb_usb_digitv_debug, int, 0644); MODULE_PARM_DESC(debug, "set debugging level (1=rc (or-able))." DVB_USB_DEBUG_STATUS); DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); #define deb_rc(args...) dprintk(dvb_usb_digitv_debug,0x01,args) static int digitv_ctrl_msg(struct dvb_usb_device *d, u8 cmd, u8 vv, u8 *wbuf, int wlen, u8 *rbuf, int rlen) { struct digitv_state *st = d->priv; int ret, wo; wo = (rbuf == NULL || rlen == 0); /* write-only */ if (wlen > 4 || rlen > 4) return -EIO; memset(st->sndbuf, 0, 7); memset(st->rcvbuf, 0, 7); st->sndbuf[0] = cmd; st->sndbuf[1] = vv; st->sndbuf[2] = wo ? wlen : rlen; if (wo) { memcpy(&st->sndbuf[3], wbuf, wlen); ret = dvb_usb_generic_write(d, st->sndbuf, 7); } else { ret = dvb_usb_generic_rw(d, st->sndbuf, 7, st->rcvbuf, 7, 10); memcpy(rbuf, &st->rcvbuf[3], rlen); } return ret; } /* I2C */ static int digitv_i2c_xfer(struct i2c_adapter *adap,struct i2c_msg msg[],int num) { struct dvb_usb_device *d = i2c_get_adapdata(adap); int i; if (mutex_lock_interruptible(&d->i2c_mutex) < 0) return -EAGAIN; if (num > 2) warn("more than 2 i2c messages at a time is not handled yet. TODO."); for (i = 0; i < num; i++) { if (msg[i].len < 1) { i = -EOPNOTSUPP; break; } /* write/read request */ if (i+1 < num && (msg[i+1].flags & I2C_M_RD)) { if (digitv_ctrl_msg(d, USB_READ_COFDM, msg[i].buf[0], NULL, 0, msg[i+1].buf,msg[i+1].len) < 0) break; i++; } else if (digitv_ctrl_msg(d,USB_WRITE_COFDM, msg[i].buf[0], &msg[i].buf[1],msg[i].len-1,NULL,0) < 0) break; } mutex_unlock(&d->i2c_mutex); return i; } static u32 digitv_i2c_func(struct i2c_adapter *adapter) { return I2C_FUNC_I2C; } static struct i2c_algorithm digitv_i2c_algo = { .master_xfer = digitv_i2c_xfer, .functionality = digitv_i2c_func, }; /* Callbacks for DVB USB */ static int digitv_identify_state(struct usb_device *udev, const struct dvb_usb_device_properties *props, const struct dvb_usb_device_description **desc, int *cold) { *cold = udev->descriptor.iManufacturer == 0 && udev->descriptor.iProduct == 0; return 0; } static int digitv_mt352_demod_init(struct dvb_frontend *fe) { static u8 reset_buf[] = { 0x89, 0x38, 0x8a, 0x2d, 0x50, 0x80 }; static u8 init_buf[] = { 0x68, 0xa0, 0x8e, 0x40, 0x53, 0x50, 0x67, 0x20, 0x7d, 0x01, 0x7c, 0x00, 0x7a, 0x00, 0x79, 0x20, 0x57, 0x05, 0x56, 0x31, 0x88, 0x0f, 0x75, 0x32 }; int i; for (i = 0; i < ARRAY_SIZE(reset_buf); i += 2) mt352_write(fe, &reset_buf[i], 2); msleep(1); for (i = 0; i < ARRAY_SIZE(init_buf); i += 2) mt352_write(fe, &init_buf[i], 2); return 0; } static struct mt352_config digitv_mt352_config = { .demod_init = digitv_mt352_demod_init, }; static int digitv_nxt6000_tuner_set_params(struct dvb_frontend *fe) { struct dvb_usb_adapter *adap = fe->dvb->priv; u8 b[5]; fe->ops.tuner_ops.calc_regs(fe, b, sizeof(b)); if (fe->ops.i2c_gate_ctrl) fe->ops.i2c_gate_ctrl(fe, 1); return digitv_ctrl_msg(adap->dev, USB_WRITE_TUNER, 0, &b[1], 4, NULL, 0); } static struct nxt6000_config digitv_nxt6000_config = { .clock_inversion = 1, }; static int digitv_frontend_attach(struct dvb_usb_adapter *adap) { struct digitv_state *st = adap->dev->priv; adap->fe_adap[0].fe = dvb_attach(mt352_attach, &digitv_mt352_config, &adap->dev->i2c_adap); if ((adap->fe_adap[0].fe) != NULL) { st->is_nxt6000 = 0; return 0; } adap->fe_adap[0].fe = dvb_attach(nxt6000_attach, &digitv_nxt6000_config, &adap->dev->i2c_adap); if ((adap->fe_adap[0].fe) != NULL) { st->is_nxt6000 = 1; return 0; } return -EIO; } static int digitv_tuner_attach(struct dvb_usb_adapter *adap) { struct digitv_state *st = adap->dev->priv; if (!dvb_attach(dvb_pll_attach, adap->fe_adap[0].fe, 0x60, NULL, DVB_PLL_TDED4)) return -ENODEV; if (st->is_nxt6000) adap->fe_adap[0].fe->ops.tuner_ops.set_params = digitv_nxt6000_tuner_set_params; return 0; } static struct rc_map_table rc_map_digitv_table[] = { { 0x5f55, KEY_0 }, { 0x6f55, KEY_1 }, { 0x9f55, KEY_2 }, { 0xaf55, KEY_3 }, { 0x5f56, KEY_4 }, { 0x6f56, KEY_5 }, { 0x9f56, KEY_6 }, { 0xaf56, KEY_7 }, { 0x5f59, KEY_8 }, { 0x6f59, KEY_9 }, { 0x9f59, KEY_TV }, { 0xaf59, KEY_AUX }, { 0x5f5a, KEY_DVD }, { 0x6f5a, KEY_POWER }, { 0x9f5a, KEY_CAMERA }, /* labelled 'Picture' */ { 0xaf5a, KEY_AUDIO }, { 0x5f65, KEY_INFO }, { 0x6f65, KEY_F13 }, /* 16:9 */ { 0x9f65, KEY_F14 }, /* 14:9 */ { 0xaf65, KEY_EPG }, { 0x5f66, KEY_EXIT }, { 0x6f66, KEY_MENU }, { 0x9f66, KEY_UP }, { 0xaf66, KEY_DOWN }, { 0x5f69, KEY_LEFT }, { 0x6f69, KEY_RIGHT }, { 0x9f69, KEY_ENTER }, { 0xaf69, KEY_CHANNELUP }, { 0x5f6a, KEY_CHANNELDOWN }, { 0x6f6a, KEY_VOLUMEUP }, { 0x9f6a, KEY_VOLUMEDOWN }, { 0xaf6a, KEY_RED }, { 0x5f95, KEY_GREEN }, { 0x6f95, KEY_YELLOW }, { 0x9f95, KEY_BLUE }, { 0xaf95, KEY_SUBTITLE }, { 0x5f96, KEY_F15 }, /* AD */ { 0x6f96, KEY_TEXT }, { 0x9f96, KEY_MUTE }, { 0xaf96, KEY_REWIND }, { 0x5f99, KEY_STOP }, { 0x6f99, KEY_PLAY }, { 0x9f99, KEY_FASTFORWARD }, { 0xaf99, KEY_F16 }, /* chapter */ { 0x5f9a, KEY_PAUSE }, { 0x6f9a, KEY_PLAY }, { 0x9f9a, KEY_RECORD }, { 0xaf9a, KEY_F17 }, /* picture in picture */ { 0x5fa5, KEY_KPPLUS }, /* zoom in */ { 0x6fa5, KEY_KPMINUS }, /* zoom out */ { 0x9fa5, KEY_F18 }, /* capture */ { 0xafa5, KEY_F19 }, /* web */ { 0x5fa6, KEY_EMAIL }, { 0x6fa6, KEY_PHONE }, { 0x9fa6, KEY_PC }, }; static int digitv_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { struct rc_map_table *entry; int ret, i; u8 key[4]; u8 b[4] = { 0 }; *event = 0; *state = REMOTE_NO_KEY_PRESSED; ret = digitv_ctrl_msg(d, USB_READ_REMOTE, 0, NULL, 0, key, 4); if (ret) return ret; /* Tell the device we've read the remote. Not sure how necessary this is, but the Nebula SDK does it. */ ret = digitv_ctrl_msg(d, USB_WRITE_REMOTE, 0, b, 4, NULL, 0); if (ret) return ret; /* if something is inside the buffer, simulate key press */ if (key[0] != 0) { for (i = 0; i < d->props.rc.legacy.rc_map_size; i++) { entry = &d->props.rc.legacy.rc_map_table[i]; if (rc5_custom(entry) == key[0] && rc5_data(entry) == key[1]) { *event = entry->keycode; *state = REMOTE_KEY_PRESSED; return 0; } } deb_rc("key: %*ph\n", 4, key); } return 0; } /* DVB USB Driver stuff */ static struct dvb_usb_device_properties digitv_properties; static int digitv_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct dvb_usb_device *d; int ret = dvb_usb_device_init(intf, &digitv_properties, THIS_MODULE, &d, adapter_nr); if (ret == 0) { u8 b[4] = { 0 }; if (d != NULL) { /* do that only when the firmware is loaded */ b[0] = 1; digitv_ctrl_msg(d,USB_WRITE_REMOTE_TYPE,0,b,4,NULL,0); b[0] = 0; digitv_ctrl_msg(d,USB_WRITE_REMOTE,0,b,4,NULL,0); } } return ret; } enum { ANCHOR_NEBULA_DIGITV, }; static struct usb_device_id digitv_table[] = { DVB_USB_DEV(ANCHOR, ANCHOR_NEBULA_DIGITV), { } }; MODULE_DEVICE_TABLE (usb, digitv_table); static struct dvb_usb_device_properties digitv_properties = { .caps = DVB_USB_IS_AN_I2C_ADAPTER, .usb_ctrl = CYPRESS_FX2, .firmware = "dvb-usb-digitv-02.fw", .size_of_priv = sizeof(struct digitv_state), .num_adapters = 1, .adapter = { { .num_frontends = 1, .fe = {{ .frontend_attach = digitv_frontend_attach, .tuner_attach = digitv_tuner_attach, /* parameter for the MPEG2-data transfer */ .stream = { .type = USB_BULK, .count = 7, .endpoint = 0x02, .u = { .bulk = { .buffersize = 4096, } } }, }}, } }, .identify_state = digitv_identify_state, .rc.legacy = { .rc_interval = 1000, .rc_map_table = rc_map_digitv_table, .rc_map_size = ARRAY_SIZE(rc_map_digitv_table), .rc_query = digitv_rc_query, }, .i2c_algo = &digitv_i2c_algo, .generic_bulk_ctrl_endpoint = 0x01, .num_device_descs = 1, .devices = { { "Nebula Electronics uDigiTV DVB-T USB2.0)", { &digitv_table[ANCHOR_NEBULA_DIGITV], NULL }, { NULL }, }, { NULL }, } }; static struct usb_driver digitv_driver = { .name = "dvb_usb_digitv", .probe = digitv_probe, .disconnect = dvb_usb_device_exit, .id_table = digitv_table, }; module_usb_driver(digitv_driver); MODULE_AUTHOR("Patrick Boettcher <patrick.boettcher@posteo.de>"); MODULE_DESCRIPTION("Driver for Nebula Electronics uDigiTV DVB-T USB2.0"); MODULE_VERSION("1.0-alpha"); MODULE_LICENSE("GPL");
1 6 1 3 5 5 4 2 72 72 72 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 // SPDX-License-Identifier: GPL-2.0 /* * xfrm6_policy.c: based on xfrm4_policy.c * * Authors: * Mitsuru KANDA @USAGI * Kazunori MIYAZAWA @USAGI * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * IPv6 support * YOSHIFUJI Hideaki * Split up af-specific portion * */ #include <linux/err.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <net/addrconf.h> #include <net/dst.h> #include <net/xfrm.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/l3mdev.h> static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, u32 mark) { struct flowi6 fl6; struct dst_entry *dst; int err; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif); fl6.flowi6_mark = mark; memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); dst = ip6_route_output(net, NULL, &fl6); err = dst->error; if (dst->error) { dst_release(dst); dst = ERR_PTR(err); } return dst; } static int xfrm6_get_saddr(struct net *net, int oif, xfrm_address_t *saddr, xfrm_address_t *daddr, u32 mark) { struct dst_entry *dst; struct net_device *dev; struct inet6_dev *idev; dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark); if (IS_ERR(dst)) return -EHOSTUNREACH; idev = ip6_dst_idev(dst); if (!idev) { dst_release(dst); return -EHOSTUNREACH; } dev = idev->dev; ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6); dst_release(dst); return 0; } static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { struct rt6_info *rt = dst_rt6_info(xdst->route); xdst->u.dst.dev = dev; netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); xdst->u.rt6.rt6i_idev = in6_dev_get(dev); if (!xdst->u.rt6.rt6i_idev) { netdev_put(dev, &xdst->u.dst.dev_tracker); return -ENODEV; } /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); xdst->route_cookie = rt6_get_cookie(rt); xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst; xdst->u.rt6.rt6i_src = rt->rt6i_src; rt6_uncached_list_add(&xdst->u.rt6); return 0; } static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; path->ops->redirect(path, sk, skb); } static void xfrm6_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; dst_destroy_metrics_generic(dst); rt6_uncached_list_del(&xdst->u.rt6); if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); xfrm_dst_destroy(xdst); } static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { struct xfrm_dst *xdst; xdst = (struct xfrm_dst *)dst; if (xdst->u.rt6.rt6i_idev->dev == dev) { struct inet6_dev *loopback_idev = in6_dev_get(dev_net(dev)->loopback_dev); do { in6_dev_put(xdst->u.rt6.rt6i_idev); xdst->u.rt6.rt6i_idev = loopback_idev; in6_dev_hold(loopback_idev); xdst = (struct xfrm_dst *)xfrm_dst_child(&xdst->u.dst); } while (xdst->u.dst.xfrm); __in6_dev_put(loopback_idev); } xfrm_dst_ifdown(dst, dev); } static struct dst_ops xfrm6_dst_ops_template = { .family = AF_INET6, .update_pmtu = xfrm6_update_pmtu, .redirect = xfrm6_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .dst_ops = &xfrm6_dst_ops_template, .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, }; static int __init xfrm6_policy_init(void) { return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo, AF_INET6); } static void xfrm6_policy_fini(void) { xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo); } #ifdef CONFIG_SYSCTL static struct ctl_table xfrm6_policy_table[] = { { .procname = "xfrm6_gc_thresh", .data = &init_net.xfrm.xfrm6_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, }; static int __net_init xfrm6_net_sysctl_init(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; table = xfrm6_policy_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(xfrm6_policy_table), GFP_KERNEL); if (!table) goto err_alloc; table[0].data = &net->xfrm.xfrm6_dst_ops.gc_thresh; } hdr = register_net_sysctl_sz(net, "net/ipv6", table, ARRAY_SIZE(xfrm6_policy_table)); if (!hdr) goto err_reg; net->ipv6.sysctl.xfrm6_hdr = hdr; return 0; err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } static void __net_exit xfrm6_net_sysctl_exit(struct net *net) { const struct ctl_table *table; if (!net->ipv6.sysctl.xfrm6_hdr) return; table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv6.sysctl.xfrm6_hdr); if (!net_eq(net, &init_net)) kfree(table); } #else /* CONFIG_SYSCTL */ static inline int xfrm6_net_sysctl_init(struct net *net) { return 0; } static inline void xfrm6_net_sysctl_exit(struct net *net) { } #endif static int __net_init xfrm6_net_init(struct net *net) { int ret; memcpy(&net->xfrm.xfrm6_dst_ops, &xfrm6_dst_ops_template, sizeof(xfrm6_dst_ops_template)); ret = dst_entries_init(&net->xfrm.xfrm6_dst_ops); if (ret) return ret; ret = xfrm6_net_sysctl_init(net); if (ret) dst_entries_destroy(&net->xfrm.xfrm6_dst_ops); return ret; } static void __net_exit xfrm6_net_exit(struct net *net) { xfrm6_net_sysctl_exit(net); dst_entries_destroy(&net->xfrm.xfrm6_dst_ops); } static struct pernet_operations xfrm6_net_ops = { .init = xfrm6_net_init, .exit = xfrm6_net_exit, }; int __init xfrm6_init(void) { int ret; ret = xfrm6_policy_init(); if (ret) goto out; ret = xfrm6_state_init(); if (ret) goto out_policy; ret = xfrm6_protocol_init(); if (ret) goto out_state; ret = register_pernet_subsys(&xfrm6_net_ops); if (ret) goto out_protocol; ret = xfrm_nat_keepalive_init(AF_INET6); if (ret) goto out_nat_keepalive; out: return ret; out_nat_keepalive: unregister_pernet_subsys(&xfrm6_net_ops); out_protocol: xfrm6_protocol_fini(); out_state: xfrm6_state_fini(); out_policy: xfrm6_policy_fini(); goto out; } void xfrm6_fini(void) { xfrm_nat_keepalive_fini(AF_INET6); unregister_pernet_subsys(&xfrm6_net_ops); xfrm6_protocol_fini(); xfrm6_policy_fini(); xfrm6_state_fini(); }
4 2 3 3 37 1 37 2 2 2 2 1 2 2 2 2 27 7 34 34 6 34 41 41 41 39 39 38 1 38 37 2 2 37 28 10 8 33 3 27 7 34 36 31 4 3 6 1 4 4 4 30 4 26 3 1 45 2 1 3 4 4 1 4 2 1 2 3 8 4 77 77 4 7 2 5 42 2 41 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 // SPDX-License-Identifier: GPL-2.0-or-later /* AF_RXRPC sendmsg() implementation. * * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/net.h> #include <linux/gfp.h> #include <linux/skbuff.h> #include <linux/export.h> #include <linux/sched/signal.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include "ar-internal.h" /* * Propose an abort to be made in the I/O thread. */ bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error, enum rxrpc_abort_reason why) { _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why); if (!call->send_abort && !rxrpc_call_is_complete(call)) { call->send_abort_why = why; call->send_abort_err = error; call->send_abort_seq = 0; /* Request abort locklessly vs rxrpc_input_call_event(). */ smp_store_release(&call->send_abort, abort_code); rxrpc_poke_call(call, rxrpc_call_poke_abort); return true; } return false; } /* * Wait for a call to become connected. Interruption here doesn't cause the * call to be aborted. */ static int rxrpc_wait_to_be_connected(struct rxrpc_call *call, long *timeo) { DECLARE_WAITQUEUE(myself, current); int ret = 0; _enter("%d", call->debug_id); if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) goto no_wait; add_wait_queue_exclusive(&call->waitq, &myself); for (;;) { switch (call->interruptibility) { case RXRPC_INTERRUPTIBLE: case RXRPC_PREINTERRUPTIBLE: set_current_state(TASK_INTERRUPTIBLE); break; case RXRPC_UNINTERRUPTIBLE: default: set_current_state(TASK_UNINTERRUPTIBLE); break; } if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) break; if ((call->interruptibility == RXRPC_INTERRUPTIBLE || call->interruptibility == RXRPC_PREINTERRUPTIBLE) && signal_pending(current)) { ret = sock_intr_errno(*timeo); break; } *timeo = schedule_timeout(*timeo); } remove_wait_queue(&call->waitq, &myself); __set_current_state(TASK_RUNNING); no_wait: if (ret == 0 && rxrpc_call_is_complete(call)) ret = call->error; _leave(" = %d", ret); return ret; } /* * Return true if there's sufficient Tx queue space. */ static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win) { if (_tx_win) *_tx_win = call->tx_bottom; return call->tx_prepared - call->tx_bottom < 256; } /* * Wait for space to appear in the Tx queue or a signal to occur. */ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, struct rxrpc_call *call, long *timeo) { for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (rxrpc_check_tx_space(call, NULL)) return 0; if (rxrpc_call_is_complete(call)) return call->error; if (signal_pending(current)) return sock_intr_errno(*timeo); trace_rxrpc_txqueue(call, rxrpc_txqueue_wait); *timeo = schedule_timeout(*timeo); } } /* * Wait for space to appear in the Tx queue uninterruptibly, but with * a timeout of 2*RTT if no progress was made and a signal occurred. */ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, struct rxrpc_call *call) { rxrpc_seq_t tx_start, tx_win; signed long rtt, timeout; rtt = READ_ONCE(call->peer->srtt_us) >> 3; rtt = usecs_to_jiffies(rtt) * 2; if (rtt < 2) rtt = 2; timeout = rtt; tx_start = smp_load_acquire(&call->acks_hard_ack); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (rxrpc_check_tx_space(call, &tx_win)) return 0; if (rxrpc_call_is_complete(call)) return call->error; if (timeout == 0 && tx_win == tx_start && signal_pending(current)) return -EINTR; if (tx_win != tx_start) { timeout = rtt; tx_start = tx_win; } trace_rxrpc_txqueue(call, rxrpc_txqueue_wait); timeout = schedule_timeout(timeout); } } /* * Wait for space to appear in the Tx queue uninterruptibly. */ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, struct rxrpc_call *call, long *timeo) { for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (rxrpc_check_tx_space(call, NULL)) return 0; if (rxrpc_call_is_complete(call)) return call->error; trace_rxrpc_txqueue(call, rxrpc_txqueue_wait); *timeo = schedule_timeout(*timeo); } } /* * wait for space to appear in the transmit/ACK window * - caller holds the socket locked */ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, struct rxrpc_call *call, long *timeo, bool waitall) { DECLARE_WAITQUEUE(myself, current); int ret; _enter(",{%u,%u,%u,%u}", call->tx_bottom, call->acks_hard_ack, call->tx_top, call->tx_winsize); add_wait_queue(&call->waitq, &myself); switch (call->interruptibility) { case RXRPC_INTERRUPTIBLE: if (waitall) ret = rxrpc_wait_for_tx_window_waitall(rx, call); else ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo); break; case RXRPC_PREINTERRUPTIBLE: case RXRPC_UNINTERRUPTIBLE: default: ret = rxrpc_wait_for_tx_window_nonintr(rx, call, timeo); break; } remove_wait_queue(&call->waitq, &myself); set_current_state(TASK_RUNNING); _leave(" = %d", ret); return ret; } /* * Notify the owner of the call that the transmit phase is ended and the last * packet has been queued. */ static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call, rxrpc_notify_end_tx_t notify_end_tx) { if (notify_end_tx) notify_end_tx(&rx->sk, call, call->user_call_ID); } /* * Queue a DATA packet for transmission, set the resend timeout and send * the packet immediately. Returns the error from rxrpc_send_data_packet() * in case the caller wants to do something with it. */ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, struct rxrpc_txbuf *txb, rxrpc_notify_end_tx_t notify_end_tx) { rxrpc_seq_t seq = txb->seq; bool poke, last = txb->flags & RXRPC_LAST_PACKET; rxrpc_inc_stat(call->rxnet, stat_tx_data); ASSERTCMP(txb->seq, ==, call->tx_prepared + 1); /* We have to set the timestamp before queueing as the retransmit * algorithm can see the packet as soon as we queue it. */ txb->last_sent = ktime_get_real(); if (last) trace_rxrpc_txqueue(call, rxrpc_txqueue_queue_last); else trace_rxrpc_txqueue(call, rxrpc_txqueue_queue); /* Add the packet to the call's output buffer */ spin_lock(&call->tx_lock); poke = list_empty(&call->tx_sendmsg); list_add_tail(&txb->call_link, &call->tx_sendmsg); call->tx_prepared = seq; if (last) rxrpc_notify_end_tx(rx, call, notify_end_tx); spin_unlock(&call->tx_lock); if (poke) rxrpc_poke_call(call, rxrpc_call_poke_start); } /* * send data through a socket * - must be called in process context * - The caller holds the call user access mutex, but not the socket lock. */ static int rxrpc_send_data(struct rxrpc_sock *rx, struct rxrpc_call *call, struct msghdr *msg, size_t len, rxrpc_notify_end_tx_t notify_end_tx, bool *_dropped_lock) { struct rxrpc_txbuf *txb; struct sock *sk = &rx->sk; enum rxrpc_call_state state; long timeo; bool more = msg->msg_flags & MSG_MORE; int ret, copied = 0; timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); ret = rxrpc_wait_to_be_connected(call, &timeo); if (ret < 0) return ret; if (call->conn->state == RXRPC_CONN_CLIENT_UNSECURED) { ret = rxrpc_init_client_conn_security(call->conn); if (ret < 0) return ret; } /* this should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); reload: ret = -EPIPE; if (sk->sk_shutdown & SEND_SHUTDOWN) goto maybe_error; state = rxrpc_call_state(call); ret = -ESHUTDOWN; if (state >= RXRPC_CALL_COMPLETE) goto maybe_error; ret = -EPROTO; if (state != RXRPC_CALL_CLIENT_SEND_REQUEST && state != RXRPC_CALL_SERVER_ACK_REQUEST && state != RXRPC_CALL_SERVER_SEND_REPLY) { /* Request phase complete for this client call */ trace_rxrpc_abort(call->debug_id, rxrpc_sendmsg_late_send, call->cid, call->call_id, call->rx_consumed, 0, -EPROTO); goto maybe_error; } ret = -EMSGSIZE; if (call->tx_total_len != -1) { if (len - copied > call->tx_total_len) goto maybe_error; if (!more && len - copied != call->tx_total_len) goto maybe_error; } txb = call->tx_pending; call->tx_pending = NULL; if (txb) rxrpc_see_txbuf(txb, rxrpc_txbuf_see_send_more); do { if (!txb) { size_t remain; _debug("alloc"); if (!rxrpc_check_tx_space(call, NULL)) goto wait_for_space; /* Work out the maximum size of a packet. Assume that * the security header is going to be in the padded * region (enc blocksize), but the trailer is not. */ remain = more ? INT_MAX : msg_data_left(msg); txb = call->conn->security->alloc_txbuf(call, remain, sk->sk_allocation); if (!txb) { ret = -ENOMEM; goto maybe_error; } } _debug("append"); /* append next segment of data to the current buffer */ if (msg_data_left(msg) > 0) { size_t copy = min_t(size_t, txb->space, msg_data_left(msg)); _debug("add %zu", copy); if (!copy_from_iter_full(txb->kvec[0].iov_base + txb->offset, copy, &msg->msg_iter)) goto efault; _debug("added"); txb->space -= copy; txb->len += copy; txb->offset += copy; copied += copy; if (call->tx_total_len != -1) call->tx_total_len -= copy; } /* check for the far side aborting the call or a network error * occurring */ if (rxrpc_call_is_complete(call)) goto call_terminated; /* add the packet to the send queue if it's now full */ if (!txb->space || (msg_data_left(msg) == 0 && !more)) { if (msg_data_left(msg) == 0 && !more) txb->flags |= RXRPC_LAST_PACKET; else if (call->tx_top - call->acks_hard_ack < call->tx_winsize) txb->flags |= RXRPC_MORE_PACKETS; ret = call->security->secure_packet(call, txb); if (ret < 0) goto out; txb->kvec[0].iov_len += txb->len; txb->len = txb->kvec[0].iov_len; rxrpc_queue_packet(rx, call, txb, notify_end_tx); txb = NULL; } } while (msg_data_left(msg) > 0); success: ret = copied; if (rxrpc_call_is_complete(call) && call->error < 0) ret = call->error; out: call->tx_pending = txb; _leave(" = %d", ret); return ret; call_terminated: rxrpc_put_txbuf(txb, rxrpc_txbuf_put_send_aborted); _leave(" = %d", call->error); return call->error; maybe_error: if (copied) goto success; goto out; efault: ret = -EFAULT; goto out; wait_for_space: ret = -EAGAIN; if (msg->msg_flags & MSG_DONTWAIT) goto maybe_error; mutex_unlock(&call->user_mutex); *_dropped_lock = true; ret = rxrpc_wait_for_tx_window(rx, call, &timeo, msg->msg_flags & MSG_WAITALL); if (ret < 0) goto maybe_error; if (call->interruptibility == RXRPC_INTERRUPTIBLE) { if (mutex_lock_interruptible(&call->user_mutex) < 0) { ret = sock_intr_errno(timeo); goto maybe_error; } } else { mutex_lock(&call->user_mutex); } *_dropped_lock = false; goto reload; } /* * extract control messages from the sendmsg() control buffer */ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) { struct cmsghdr *cmsg; bool got_user_ID = false; int len; if (msg->msg_controllen == 0) return -EINVAL; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; len = cmsg->cmsg_len - sizeof(struct cmsghdr); _debug("CMSG %d, %d, %d", cmsg->cmsg_level, cmsg->cmsg_type, len); if (cmsg->cmsg_level != SOL_RXRPC) continue; switch (cmsg->cmsg_type) { case RXRPC_USER_CALL_ID: if (msg->msg_flags & MSG_CMSG_COMPAT) { if (len != sizeof(u32)) return -EINVAL; p->call.user_call_ID = *(u32 *)CMSG_DATA(cmsg); } else { if (len != sizeof(unsigned long)) return -EINVAL; p->call.user_call_ID = *(unsigned long *) CMSG_DATA(cmsg); } got_user_ID = true; break; case RXRPC_ABORT: if (p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; p->command = RXRPC_CMD_SEND_ABORT; if (len != sizeof(p->abort_code)) return -EINVAL; p->abort_code = *(unsigned int *)CMSG_DATA(cmsg); if (p->abort_code == 0) return -EINVAL; break; case RXRPC_CHARGE_ACCEPT: if (p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; p->command = RXRPC_CMD_CHARGE_ACCEPT; if (len != 0) return -EINVAL; break; case RXRPC_EXCLUSIVE_CALL: p->exclusive = true; if (len != 0) return -EINVAL; break; case RXRPC_UPGRADE_SERVICE: p->upgrade = true; if (len != 0) return -EINVAL; break; case RXRPC_TX_LENGTH: if (p->call.tx_total_len != -1 || len != sizeof(__s64)) return -EINVAL; p->call.tx_total_len = *(__s64 *)CMSG_DATA(cmsg); if (p->call.tx_total_len < 0) return -EINVAL; break; case RXRPC_SET_CALL_TIMEOUT: if (len & 3 || len < 4 || len > 12) return -EINVAL; memcpy(&p->call.timeouts, CMSG_DATA(cmsg), len); p->call.nr_timeouts = len / 4; if (p->call.timeouts.hard > INT_MAX / HZ) return -ERANGE; if (p->call.nr_timeouts >= 2 && p->call.timeouts.idle > 60 * 60 * 1000) return -ERANGE; if (p->call.nr_timeouts >= 3 && p->call.timeouts.normal > 60 * 60 * 1000) return -ERANGE; break; default: return -EINVAL; } } if (!got_user_ID) return -EINVAL; if (p->call.tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; _leave(" = 0"); return 0; } /* * Create a new client call for sendmsg(). * - Called with the socket lock held, which it must release. * - If it returns a call, the call's lock will need releasing by the caller. */ static struct rxrpc_call * rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, struct rxrpc_send_params *p) __releases(&rx->sk.sk_lock.slock) __acquires(&call->user_mutex) { struct rxrpc_conn_parameters cp; struct rxrpc_peer *peer; struct rxrpc_call *call; struct key *key; DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name); _enter(""); if (!msg->msg_name) { release_sock(&rx->sk); return ERR_PTR(-EDESTADDRREQ); } peer = rxrpc_lookup_peer(rx->local, srx, GFP_KERNEL); if (!peer) { release_sock(&rx->sk); return ERR_PTR(-ENOMEM); } key = rx->key; if (key && !rx->key->payload.data[0]) key = NULL; memset(&cp, 0, sizeof(cp)); cp.local = rx->local; cp.peer = peer; cp.key = rx->key; cp.security_level = rx->min_sec_level; cp.exclusive = rx->exclusive | p->exclusive; cp.upgrade = p->upgrade; cp.service_id = srx->srx_service; call = rxrpc_new_client_call(rx, &cp, &p->call, GFP_KERNEL, atomic_inc_return(&rxrpc_debug_id)); /* The socket is now unlocked */ rxrpc_put_peer(peer, rxrpc_peer_put_application); _leave(" = %p\n", call); return call; } /* * send a message forming part of a client call through an RxRPC socket * - caller holds the socket locked * - the socket may be either a client socket or a server socket */ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) __releases(&rx->sk.sk_lock.slock) { struct rxrpc_call *call; bool dropped_lock = false; int ret; struct rxrpc_send_params p = { .call.tx_total_len = -1, .call.user_call_ID = 0, .call.nr_timeouts = 0, .call.interruptibility = RXRPC_INTERRUPTIBLE, .abort_code = 0, .command = RXRPC_CMD_SEND_DATA, .exclusive = false, .upgrade = false, }; _enter(""); ret = rxrpc_sendmsg_cmsg(msg, &p); if (ret < 0) goto error_release_sock; if (p.command == RXRPC_CMD_CHARGE_ACCEPT) { ret = -EINVAL; if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) goto error_release_sock; ret = rxrpc_user_charge_accept(rx, p.call.user_call_ID); goto error_release_sock; } call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID); if (!call) { ret = -EBADSLT; if (p.command != RXRPC_CMD_SEND_DATA) goto error_release_sock; call = rxrpc_new_client_call_for_sendmsg(rx, msg, &p); /* The socket is now unlocked... */ if (IS_ERR(call)) return PTR_ERR(call); /* ... and we have the call lock. */ p.call.nr_timeouts = 0; ret = 0; if (rxrpc_call_is_complete(call)) goto out_put_unlock; } else { switch (rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_AWAIT_CONN: case RXRPC_CALL_SERVER_SECURING: if (p.command == RXRPC_CMD_SEND_ABORT) break; fallthrough; case RXRPC_CALL_UNINITIALISED: case RXRPC_CALL_SERVER_PREALLOC: rxrpc_put_call(call, rxrpc_call_put_sendmsg); ret = -EBUSY; goto error_release_sock; default: break; } ret = mutex_lock_interruptible(&call->user_mutex); release_sock(&rx->sk); if (ret < 0) { ret = -ERESTARTSYS; goto error_put; } if (p.call.tx_total_len != -1) { ret = -EINVAL; if (call->tx_total_len != -1 || call->tx_pending || call->tx_top != 0) goto out_put_unlock; call->tx_total_len = p.call.tx_total_len; } } switch (p.call.nr_timeouts) { case 3: WRITE_ONCE(call->next_rx_timo, p.call.timeouts.normal); fallthrough; case 2: WRITE_ONCE(call->next_req_timo, p.call.timeouts.idle); fallthrough; case 1: if (p.call.timeouts.hard > 0) { ktime_t delay = ms_to_ktime(p.call.timeouts.hard * MSEC_PER_SEC); WRITE_ONCE(call->expect_term_by, ktime_add(p.call.timeouts.hard, ktime_get_real())); trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard); rxrpc_poke_call(call, rxrpc_call_poke_set_timeout); } break; } if (rxrpc_call_is_complete(call)) { /* it's too late for this call */ ret = -ESHUTDOWN; } else if (p.command == RXRPC_CMD_SEND_ABORT) { rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED, rxrpc_abort_call_sendmsg); ret = 0; } else if (p.command != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; } else { ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock); } out_put_unlock: if (!dropped_lock) mutex_unlock(&call->user_mutex); error_put: rxrpc_put_call(call, rxrpc_call_put_sendmsg); _leave(" = %d", ret); return ret; error_release_sock: release_sock(&rx->sk); return ret; } /** * rxrpc_kernel_send_data - Allow a kernel service to send data on a call * @sock: The socket the call is on * @call: The call to send data through * @msg: The data to send * @len: The amount of data to send * @notify_end_tx: Notification that the last packet is queued. * * Allow a kernel service to send data on a call. The call must be in an state * appropriate to sending data. No control data should be supplied in @msg, * nor should an address be supplied. MSG_MORE should be flagged if there's * more data to come, otherwise this data will end the transmission phase. */ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, struct msghdr *msg, size_t len, rxrpc_notify_end_tx_t notify_end_tx) { bool dropped_lock = false; int ret; _enter("{%d},", call->debug_id); ASSERTCMP(msg->msg_name, ==, NULL); ASSERTCMP(msg->msg_control, ==, NULL); mutex_lock(&call->user_mutex); ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, notify_end_tx, &dropped_lock); if (ret == -ESHUTDOWN) ret = call->error; if (!dropped_lock) mutex_unlock(&call->user_mutex); _leave(" = %d", ret); return ret; } EXPORT_SYMBOL(rxrpc_kernel_send_data); /** * rxrpc_kernel_abort_call - Allow a kernel service to abort a call * @sock: The socket the call is on * @call: The call to be aborted * @abort_code: The abort code to stick into the ABORT packet * @error: Local error value * @why: Indication as to why. * * Allow a kernel service to abort a call, if it's still in an abortable state * and return true if the call was aborted, false if it was already complete. */ bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, u32 abort_code, int error, enum rxrpc_abort_reason why) { bool aborted; _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why); mutex_lock(&call->user_mutex); aborted = rxrpc_propose_abort(call, abort_code, error, why); mutex_unlock(&call->user_mutex); return aborted; } EXPORT_SYMBOL(rxrpc_kernel_abort_call); /** * rxrpc_kernel_set_tx_length - Set the total Tx length on a call * @sock: The socket the call is on * @call: The call to be informed * @tx_total_len: The amount of data to be transmitted for this call * * Allow a kernel service to set the total transmit length on a call. This * allows buffer-to-packet encrypt-and-copy to be performed. * * This function is primarily for use for setting the reply length since the * request length can be set when beginning the call. */ void rxrpc_kernel_set_tx_length(struct socket *sock, struct rxrpc_call *call, s64 tx_total_len) { WARN_ON(call->tx_total_len != -1); call->tx_total_len = tx_total_len; } EXPORT_SYMBOL(rxrpc_kernel_set_tx_length);
2 3 6 8 9 5 1 4 2 2 2 2 2 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 /* * algif_rng: User-space interface for random number generators * * This file provides the user-space API for random number generators. * * Copyright (C) 2014, Stephan Mueller <smueller@chronox.de> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, and the entire permission notice in its entirety, * including the disclaimer of warranties. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * ALTERNATIVELY, this product may be distributed under the terms of * the GNU General Public License, in which case the provisions of the GPL2 * are required INSTEAD OF the above restrictions. (This clause is * necessary due to a potential bad interaction between the GPL and * the restrictions contained in a BSD-style copyright.) * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ #include <linux/capability.h> #include <linux/module.h> #include <crypto/rng.h> #include <linux/random.h> #include <crypto/if_alg.h> #include <linux/net.h> #include <net/sock.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stephan Mueller <smueller@chronox.de>"); MODULE_DESCRIPTION("User-space interface for random number generators"); struct rng_ctx { #define MAXSIZE 128 unsigned int len; struct crypto_rng *drng; u8 *addtl; size_t addtl_len; }; struct rng_parent_ctx { struct crypto_rng *drng; u8 *entropy; }; static void rng_reset_addtl(struct rng_ctx *ctx) { kfree_sensitive(ctx->addtl); ctx->addtl = NULL; ctx->addtl_len = 0; } static int _rng_recvmsg(struct crypto_rng *drng, struct msghdr *msg, size_t len, u8 *addtl, size_t addtl_len) { int err = 0; int genlen = 0; u8 result[MAXSIZE]; if (len == 0) return 0; if (len > MAXSIZE) len = MAXSIZE; /* * although not strictly needed, this is a precaution against coding * errors */ memset(result, 0, len); /* * The enforcement of a proper seeding of an RNG is done within an * RNG implementation. Some RNGs (DRBG, krng) do not need specific * seeding as they automatically seed. The X9.31 DRNG will return * an error if it was not seeded properly. */ genlen = crypto_rng_generate(drng, addtl, addtl_len, result, len); if (genlen < 0) return genlen; err = memcpy_to_msg(msg, result, len); memzero_explicit(result, len); return err ? err : len; } static int rng_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct rng_ctx *ctx = ask->private; return _rng_recvmsg(ctx->drng, msg, len, NULL, 0); } static int rng_test_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct rng_ctx *ctx = ask->private; int ret; lock_sock(sock->sk); ret = _rng_recvmsg(ctx->drng, msg, len, ctx->addtl, ctx->addtl_len); rng_reset_addtl(ctx); release_sock(sock->sk); return ret; } static int rng_test_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { int err; struct alg_sock *ask = alg_sk(sock->sk); struct rng_ctx *ctx = ask->private; lock_sock(sock->sk); if (len > MAXSIZE) { err = -EMSGSIZE; goto unlock; } rng_reset_addtl(ctx); ctx->addtl = kmalloc(len, GFP_KERNEL); if (!ctx->addtl) { err = -ENOMEM; goto unlock; } err = memcpy_from_msg(ctx->addtl, msg, len); if (err) { rng_reset_addtl(ctx); goto unlock; } ctx->addtl_len = len; unlock: release_sock(sock->sk); return err ? err : len; } static struct proto_ops algif_rng_ops = { .family = PF_ALG, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .getname = sock_no_getname, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .mmap = sock_no_mmap, .bind = sock_no_bind, .accept = sock_no_accept, .sendmsg = sock_no_sendmsg, .release = af_alg_release, .recvmsg = rng_recvmsg, }; static struct proto_ops __maybe_unused algif_rng_test_ops = { .family = PF_ALG, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .getname = sock_no_getname, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .mmap = sock_no_mmap, .bind = sock_no_bind, .accept = sock_no_accept, .release = af_alg_release, .recvmsg = rng_test_recvmsg, .sendmsg = rng_test_sendmsg, }; static void *rng_bind(const char *name, u32 type, u32 mask) { struct rng_parent_ctx *pctx; struct crypto_rng *rng; pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); if (!pctx) return ERR_PTR(-ENOMEM); rng = crypto_alloc_rng(name, type, mask); if (IS_ERR(rng)) { kfree(pctx); return ERR_CAST(rng); } pctx->drng = rng; return pctx; } static void rng_release(void *private) { struct rng_parent_ctx *pctx = private; if (unlikely(!pctx)) return; crypto_free_rng(pctx->drng); kfree_sensitive(pctx->entropy); kfree_sensitive(pctx); } static void rng_sock_destruct(struct sock *sk) { struct alg_sock *ask = alg_sk(sk); struct rng_ctx *ctx = ask->private; rng_reset_addtl(ctx); sock_kfree_s(sk, ctx, ctx->len); af_alg_release_parent(sk); } static int rng_accept_parent(void *private, struct sock *sk) { struct rng_ctx *ctx; struct rng_parent_ctx *pctx = private; struct alg_sock *ask = alg_sk(sk); unsigned int len = sizeof(*ctx); ctx = sock_kmalloc(sk, len, GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->len = len; ctx->addtl = NULL; ctx->addtl_len = 0; /* * No seeding done at that point -- if multiple accepts are * done on one RNG instance, each resulting FD points to the same * state of the RNG. */ ctx->drng = pctx->drng; ask->private = ctx; sk->sk_destruct = rng_sock_destruct; /* * Non NULL pctx->entropy means that CAVP test has been initiated on * this socket, replace proto_ops algif_rng_ops with algif_rng_test_ops. */ if (IS_ENABLED(CONFIG_CRYPTO_USER_API_RNG_CAVP) && pctx->entropy) sk->sk_socket->ops = &algif_rng_test_ops; return 0; } static int rng_setkey(void *private, const u8 *seed, unsigned int seedlen) { struct rng_parent_ctx *pctx = private; /* * Check whether seedlen is of sufficient size is done in RNG * implementations. */ return crypto_rng_reset(pctx->drng, seed, seedlen); } static int __maybe_unused rng_setentropy(void *private, sockptr_t entropy, unsigned int len) { struct rng_parent_ctx *pctx = private; u8 *kentropy = NULL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (pctx->entropy) return -EINVAL; if (len > MAXSIZE) return -EMSGSIZE; if (len) { kentropy = memdup_sockptr(entropy, len); if (IS_ERR(kentropy)) return PTR_ERR(kentropy); } crypto_rng_alg(pctx->drng)->set_ent(pctx->drng, kentropy, len); /* * Since rng doesn't perform any memory management for the entropy * buffer, save kentropy pointer to pctx now to free it after use. */ pctx->entropy = kentropy; return 0; } static const struct af_alg_type algif_type_rng = { .bind = rng_bind, .release = rng_release, .accept = rng_accept_parent, .setkey = rng_setkey, #ifdef CONFIG_CRYPTO_USER_API_RNG_CAVP .setentropy = rng_setentropy, #endif .ops = &algif_rng_ops, .name = "rng", .owner = THIS_MODULE }; static int __init rng_init(void) { return af_alg_register_type(&algif_type_rng); } static void __exit rng_exit(void) { int err = af_alg_unregister_type(&algif_type_rng); BUG_ON(err); } module_init(rng_init); module_exit(rng_exit);
279 35 479 2 555 933 931 1118 1103 27 1121 1120 1103 27 960 561 1107 16 30 17 17 582 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 // SPDX-License-Identifier: GPL-2.0-or-later /* * fs/inotify_user.c - inotify support for userspace * * Authors: * John McCutchan <ttb@tentacle.dhs.org> * Robert Love <rml@novell.com> * * Copyright (C) 2005 John McCutchan * Copyright 2006 Hewlett-Packard Development Company, L.P. * * Copyright (C) 2009 Eric Paris <Red Hat Inc> * inotify was largely rewriten to make use of the fsnotify infrastructure */ #include <linux/dcache.h> /* d_unlinked */ #include <linux/fs.h> /* struct inode */ #include <linux/fsnotify_backend.h> #include <linux/inotify.h> #include <linux/path.h> /* struct path */ #include <linux/slab.h> /* kmem_* */ #include <linux/types.h> #include <linux/sched.h> #include <linux/sched/user.h> #include <linux/sched/mm.h> #include "inotify.h" /* * Check if 2 events contain the same information. */ static bool event_compare(struct fsnotify_event *old_fsn, struct fsnotify_event *new_fsn) { struct inotify_event_info *old, *new; old = INOTIFY_E(old_fsn); new = INOTIFY_E(new_fsn); if (old->mask & FS_IN_IGNORED) return false; if ((old->mask == new->mask) && (old->wd == new->wd) && (old->name_len == new->name_len) && (!old->name_len || !strcmp(old->name, new->name))) return true; return false; } static int inotify_merge(struct fsnotify_group *group, struct fsnotify_event *event) { struct list_head *list = &group->notification_list; struct fsnotify_event *last_event; last_event = list_entry(list->prev, struct fsnotify_event, list); return event_compare(last_event, event); } int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, struct inode *inode, struct inode *dir, const struct qstr *name, u32 cookie) { struct inotify_inode_mark *i_mark; struct inotify_event_info *event; struct fsnotify_event *fsn_event; struct fsnotify_group *group = inode_mark->group; int ret; int len = 0, wd; int alloc_len = sizeof(struct inotify_event_info); struct mem_cgroup *old_memcg; if (name) { len = name->len; alloc_len += len + 1; } pr_debug("%s: group=%p mark=%p mask=%x\n", __func__, group, inode_mark, mask); i_mark = container_of(inode_mark, struct inotify_inode_mark, fsn_mark); /* * We can be racing with mark being detached. Don't report event with * invalid wd. */ wd = READ_ONCE(i_mark->wd); if (wd == -1) return 0; /* * Whoever is interested in the event, pays for the allocation. Do not * trigger OOM killer in the target monitoring memcg as it may have * security repercussion. */ old_memcg = set_active_memcg(group->memcg); event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); set_active_memcg(old_memcg); if (unlikely(!event)) { /* * Treat lost event due to ENOMEM the same way as queue * overflow to let userspace know event was lost. */ fsnotify_queue_overflow(group); return -ENOMEM; } /* * We now report FS_ISDIR flag with MOVE_SELF and DELETE_SELF events * for fanotify. inotify never reported IN_ISDIR with those events. * It looks like an oversight, but to avoid the risk of breaking * existing inotify programs, mask the flag out from those events. */ if (mask & (IN_MOVE_SELF | IN_DELETE_SELF)) mask &= ~IN_ISDIR; fsn_event = &event->fse; fsnotify_init_event(fsn_event); event->mask = mask; event->wd = wd; event->sync_cookie = cookie; event->name_len = len; if (len) strcpy(event->name, name->name); ret = fsnotify_add_event(group, fsn_event, inotify_merge); if (ret) { /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); } if (inode_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT) fsnotify_destroy_mark(inode_mark, group); return 0; } static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) { inotify_ignored_and_remove_idr(fsn_mark, group); } /* * This is NEVER supposed to be called. Inotify marks should either have been * removed from the idr when the watch was removed or in the * fsnotify_destroy_mark_by_group() call when the inotify instance was being * torn down. This is only called if the idr is about to be freed but there * are still marks in it. */ static int idr_callback(int id, void *p, void *data) { struct fsnotify_mark *fsn_mark; struct inotify_inode_mark *i_mark; static bool warned = false; if (warned) return 0; warned = true; fsn_mark = p; i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); WARN(1, "inotify closing but id=%d for fsn_mark=%p in group=%p still in " "idr. Probably leaking memory\n", id, p, data); /* * I'm taking the liberty of assuming that the mark in question is a * valid address and I'm dereferencing it. This might help to figure * out why we got here and the panic is no worse than the original * BUG() that was here. */ if (fsn_mark) printk(KERN_WARNING "fsn_mark->group=%p wd=%d\n", fsn_mark->group, i_mark->wd); return 0; } static void inotify_free_group_priv(struct fsnotify_group *group) { /* ideally the idr is empty and we won't hit the BUG in the callback */ idr_for_each(&group->inotify_data.idr, idr_callback, group); idr_destroy(&group->inotify_data.idr); if (group->inotify_data.ucounts) dec_inotify_instances(group->inotify_data.ucounts); } static void inotify_free_event(struct fsnotify_group *group, struct fsnotify_event *fsn_event) { kfree(INOTIFY_E(fsn_event)); } /* ding dong the mark is dead */ static void inotify_free_mark(struct fsnotify_mark *fsn_mark) { struct inotify_inode_mark *i_mark; i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); kmem_cache_free(inotify_inode_mark_cachep, i_mark); } const struct fsnotify_ops inotify_fsnotify_ops = { .handle_inode_event = inotify_handle_inode_event, .free_group_priv = inotify_free_group_priv, .free_event = inotify_free_event, .freeing_mark = inotify_freeing_mark, .free_mark = inotify_free_mark, };
3 1 2 1 2 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 // SPDX-License-Identifier: GPL-2.0-only /* * event_inode.c - part of tracefs, a pseudo file system for activating tracing * * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org> * Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com> * Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org> * * eventfs is used to dynamically create inodes and dentries based on the * meta data provided by the tracing system. * * eventfs stores the meta-data of files/dirs and holds off on creating * inodes/dentries of the files. When accessed, the eventfs will create the * inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up * and delete the inodes/dentries when they are no longer referenced. */ #include <linux/fsnotify.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/workqueue.h> #include <linux/security.h> #include <linux/tracefs.h> #include <linux/kref.h> #include <linux/delay.h> #include "internal.h" /* * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access * to the ei->dentry must be done under this mutex and after checking * if ei->is_freed is not set. When ei->is_freed is set, the dentry * is on its way to being freed after the last dput() is made on it. */ static DEFINE_MUTEX(eventfs_mutex); /* Choose something "unique" ;-) */ #define EVENTFS_FILE_INODE_INO 0x12c4e37 struct eventfs_root_inode { struct eventfs_inode ei; struct dentry *events_dir; }; static struct eventfs_root_inode *get_root_inode(struct eventfs_inode *ei) { WARN_ON_ONCE(!ei->is_events); return container_of(ei, struct eventfs_root_inode, ei); } /* Just try to make something consistent and unique */ static int eventfs_dir_ino(struct eventfs_inode *ei) { if (!ei->ino) { ei->ino = get_next_ino(); /* Must not have the file inode number */ if (ei->ino == EVENTFS_FILE_INODE_INO) ei->ino = get_next_ino(); } return ei->ino; } /* * The eventfs_inode (ei) itself is protected by SRCU. It is released from * its parent's list and will have is_freed set (under eventfs_mutex). * After the SRCU grace period is over and the last dput() is called * the ei is freed. */ DEFINE_STATIC_SRCU(eventfs_srcu); /* Mode is unsigned short, use the upper bits for flags */ enum { EVENTFS_SAVE_MODE = BIT(16), EVENTFS_SAVE_UID = BIT(17), EVENTFS_SAVE_GID = BIT(18), }; #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) static void free_ei_rcu(struct rcu_head *rcu) { struct eventfs_inode *ei = container_of(rcu, struct eventfs_inode, rcu); struct eventfs_root_inode *rei; kfree(ei->entry_attrs); kfree_const(ei->name); if (ei->is_events) { rei = get_root_inode(ei); kfree(rei); } else { kfree(ei); } } /* * eventfs_inode reference count management. * * NOTE! We count only references from dentries, in the * form 'dentry->d_fsdata'. There are also references from * directory inodes ('ti->private'), but the dentry reference * count is always a superset of the inode reference count. */ static void release_ei(struct kref *ref) { struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); const struct eventfs_entry *entry; WARN_ON_ONCE(!ei->is_freed); for (int i = 0; i < ei->nr_entries; i++) { entry = &ei->entries[i]; if (entry->release) entry->release(entry->name, ei->data); } call_srcu(&eventfs_srcu, &ei->rcu, free_ei_rcu); } static inline void put_ei(struct eventfs_inode *ei) { if (ei) kref_put(&ei->kref, release_ei); } static inline void free_ei(struct eventfs_inode *ei) { if (ei) { ei->is_freed = 1; put_ei(ei); } } /* * Called when creation of an ei fails, do not call release() functions. */ static inline void cleanup_ei(struct eventfs_inode *ei) { if (ei) { /* Set nr_entries to 0 to prevent release() function being called */ ei->nr_entries = 0; free_ei(ei); } } static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei) { if (ei) kref_get(&ei->kref); return ei; } static struct dentry *eventfs_root_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); static int eventfs_iterate(struct file *file, struct dir_context *ctx); static void update_attr(struct eventfs_attr *attr, struct iattr *iattr) { unsigned int ia_valid = iattr->ia_valid; if (ia_valid & ATTR_MODE) { attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) | (iattr->ia_mode & EVENTFS_MODE_MASK) | EVENTFS_SAVE_MODE; } if (ia_valid & ATTR_UID) { attr->mode |= EVENTFS_SAVE_UID; attr->uid = iattr->ia_uid; } if (ia_valid & ATTR_GID) { attr->mode |= EVENTFS_SAVE_GID; attr->gid = iattr->ia_gid; } } static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { const struct eventfs_entry *entry; struct eventfs_inode *ei; const char *name; int ret; mutex_lock(&eventfs_mutex); ei = dentry->d_fsdata; if (ei->is_freed) { /* Do not allow changes if the event is about to be removed. */ mutex_unlock(&eventfs_mutex); return -ENODEV; } /* Preallocate the children mode array if necessary */ if (!(dentry->d_inode->i_mode & S_IFDIR)) { if (!ei->entry_attrs) { ei->entry_attrs = kcalloc(ei->nr_entries, sizeof(*ei->entry_attrs), GFP_NOFS); if (!ei->entry_attrs) { ret = -ENOMEM; goto out; } } } ret = simple_setattr(idmap, dentry, iattr); if (ret < 0) goto out; /* * If this is a dir, then update the ei cache, only the file * mode is saved in the ei->m_children, and the ownership is * determined by the parent directory. */ if (dentry->d_inode->i_mode & S_IFDIR) { /* Just use the inode permissions for the events directory */ if (!ei->is_events) update_attr(&ei->attr, iattr); } else { name = dentry->d_name.name; for (int i = 0; i < ei->nr_entries; i++) { entry = &ei->entries[i]; if (strcmp(name, entry->name) == 0) { update_attr(&ei->entry_attrs[i], iattr); break; } } } out: mutex_unlock(&eventfs_mutex); return ret; } static const struct inode_operations eventfs_dir_inode_operations = { .lookup = eventfs_root_lookup, .setattr = eventfs_set_attr, }; static const struct inode_operations eventfs_file_inode_operations = { .setattr = eventfs_set_attr, }; static const struct file_operations eventfs_file_operations = { .read = generic_read_dir, .iterate_shared = eventfs_iterate, .llseek = generic_file_llseek, }; static void eventfs_set_attrs(struct eventfs_inode *ei, bool update_uid, kuid_t uid, bool update_gid, kgid_t gid, int level) { struct eventfs_inode *ei_child; /* Update events/<system>/<event> */ if (WARN_ON_ONCE(level > 3)) return; if (update_uid) { ei->attr.mode &= ~EVENTFS_SAVE_UID; ei->attr.uid = uid; } if (update_gid) { ei->attr.mode &= ~EVENTFS_SAVE_GID; ei->attr.gid = gid; } list_for_each_entry(ei_child, &ei->children, list) { eventfs_set_attrs(ei_child, update_uid, uid, update_gid, gid, level + 1); } if (!ei->entry_attrs) return; for (int i = 0; i < ei->nr_entries; i++) { if (update_uid) { ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_UID; ei->entry_attrs[i].uid = uid; } if (update_gid) { ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_GID; ei->entry_attrs[i].gid = gid; } } } /* * On a remount of tracefs, if UID or GID options are set, then * the mount point inode permissions should be used. * Reset the saved permission flags appropriately. */ void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid) { struct eventfs_inode *ei = ti->private; /* Only the events directory does the updates */ if (!ei || !ei->is_events || ei->is_freed) return; eventfs_set_attrs(ei, update_uid, ti->vfs_inode.i_uid, update_gid, ti->vfs_inode.i_gid, 0); } static void update_inode_attr(struct inode *inode, umode_t mode, struct eventfs_attr *attr, struct eventfs_root_inode *rei) { if (attr && attr->mode & EVENTFS_SAVE_MODE) inode->i_mode = attr->mode & EVENTFS_MODE_MASK; else inode->i_mode = mode; if (attr && attr->mode & EVENTFS_SAVE_UID) inode->i_uid = attr->uid; else inode->i_uid = rei->ei.attr.uid; if (attr && attr->mode & EVENTFS_SAVE_GID) inode->i_gid = attr->gid; else inode->i_gid = rei->ei.attr.gid; } static struct inode *eventfs_get_inode(struct dentry *dentry, struct eventfs_attr *attr, umode_t mode, struct eventfs_inode *ei) { struct eventfs_root_inode *rei; struct eventfs_inode *pei; struct tracefs_inode *ti; struct inode *inode; inode = tracefs_get_inode(dentry->d_sb); if (!inode) return NULL; ti = get_tracefs(inode); ti->private = ei; ti->flags |= TRACEFS_EVENT_INODE; /* Find the top dentry that holds the "events" directory */ do { dentry = dentry->d_parent; /* Directories always have d_fsdata */ pei = dentry->d_fsdata; } while (!pei->is_events); rei = get_root_inode(pei); update_inode_attr(inode, mode, attr, rei); return inode; } /** * lookup_file - look up a file in the tracefs filesystem * @parent_ei: Pointer to the eventfs_inode that represents parent of the file * @dentry: the dentry to look up * @mode: the permission that the file should have. * @attr: saved attributes changed by user * @data: something that the caller will want to get to later on. * @fop: struct file_operations that should be used for this file. * * This function creates a dentry that represents a file in the eventsfs_inode * directory. The inode.i_private pointer will point to @data in the open() * call. */ static struct dentry *lookup_file(struct eventfs_inode *parent_ei, struct dentry *dentry, umode_t mode, struct eventfs_attr *attr, void *data, const struct file_operations *fop) { struct inode *inode; if (!(mode & S_IFMT)) mode |= S_IFREG; if (WARN_ON_ONCE(!S_ISREG(mode))) return ERR_PTR(-EIO); /* Only directories have ti->private set to an ei, not files */ inode = eventfs_get_inode(dentry, attr, mode, NULL); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); inode->i_op = &eventfs_file_inode_operations; inode->i_fop = fop; inode->i_private = data; /* All files will have the same inode number */ inode->i_ino = EVENTFS_FILE_INODE_INO; // Files have their parent's ei as their fsdata dentry->d_fsdata = get_ei(parent_ei); d_add(dentry, inode); return NULL; }; /** * lookup_dir_entry - look up a dir in the tracefs filesystem * @dentry: the directory to look up * @pei: Pointer to the parent eventfs_inode if available * @ei: the eventfs_inode that represents the directory to create * * This function will look up a dentry for a directory represented by * a eventfs_inode. */ static struct dentry *lookup_dir_entry(struct dentry *dentry, struct eventfs_inode *pei, struct eventfs_inode *ei) { struct inode *inode; umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; inode = eventfs_get_inode(dentry, &ei->attr, mode, ei); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); inode->i_op = &eventfs_dir_inode_operations; inode->i_fop = &eventfs_file_operations; /* All directories will have the same inode number */ inode->i_ino = eventfs_dir_ino(ei); dentry->d_fsdata = get_ei(ei); d_add(dentry, inode); return NULL; } static inline struct eventfs_inode *init_ei(struct eventfs_inode *ei, const char *name) { ei->name = kstrdup_const(name, GFP_KERNEL); if (!ei->name) return NULL; kref_init(&ei->kref); return ei; } static inline struct eventfs_inode *alloc_ei(const char *name) { struct eventfs_inode *ei = kzalloc(sizeof(*ei), GFP_KERNEL); struct eventfs_inode *result; if (!ei) return NULL; result = init_ei(ei, name); if (!result) kfree(ei); return result; } static inline struct eventfs_inode *alloc_root_ei(const char *name) { struct eventfs_root_inode *rei = kzalloc(sizeof(*rei), GFP_KERNEL); struct eventfs_inode *ei; if (!rei) return NULL; rei->ei.is_events = 1; ei = init_ei(&rei->ei, name); if (!ei) kfree(rei); return ei; } /** * eventfs_d_release - dentry is going away * @dentry: dentry which has the reference to remove. * * Remove the association between a dentry from an eventfs_inode. */ void eventfs_d_release(struct dentry *dentry) { put_ei(dentry->d_fsdata); } /** * lookup_file_dentry - create a dentry for a file of an eventfs_inode * @dentry: The parent dentry under which the new file's dentry will be created * @ei: the eventfs_inode that the file will be created under * @idx: the index into the entry_attrs[] of the @ei * @mode: The mode of the file. * @data: The data to use to set the inode of the file with on open() * @fops: The fops of the file to be created. * * This function creates a dentry for a file associated with an * eventfs_inode @ei. It uses the entry attributes specified by @idx, * if available. The file will have the specified @mode and its inode will be * set up with @data upon open. The file operations will be set to @fops. * * Return: Returns a pointer to the newly created file's dentry or an error * pointer. */ static struct dentry * lookup_file_dentry(struct dentry *dentry, struct eventfs_inode *ei, int idx, umode_t mode, void *data, const struct file_operations *fops) { struct eventfs_attr *attr = NULL; if (ei->entry_attrs) attr = &ei->entry_attrs[idx]; return lookup_file(ei, dentry, mode, attr, data, fops); } /** * eventfs_root_lookup - lookup routine to create file/dir * @dir: in which a lookup is being done * @dentry: file/dir dentry * @flags: Just passed to simple_lookup() * * Used to create dynamic file/dir with-in @dir, search with-in @ei * list, if @dentry found go ahead and create the file/dir */ static struct dentry *eventfs_root_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct eventfs_inode *ei_child; struct tracefs_inode *ti; struct eventfs_inode *ei; const char *name = dentry->d_name.name; struct dentry *result = NULL; ti = get_tracefs(dir); if (WARN_ON_ONCE(!(ti->flags & TRACEFS_EVENT_INODE))) return ERR_PTR(-EIO); mutex_lock(&eventfs_mutex); ei = ti->private; if (!ei || ei->is_freed) goto out; list_for_each_entry(ei_child, &ei->children, list) { if (strcmp(ei_child->name, name) != 0) continue; /* A child is freed and removed from the list at the same time */ if (WARN_ON_ONCE(ei_child->is_freed)) goto out; result = lookup_dir_entry(dentry, ei, ei_child); goto out; } for (int i = 0; i < ei->nr_entries; i++) { void *data; umode_t mode; const struct file_operations *fops; const struct eventfs_entry *entry = &ei->entries[i]; if (strcmp(name, entry->name) != 0) continue; data = ei->data; if (entry->callback(name, &mode, &data, &fops) <= 0) goto out; result = lookup_file_dentry(dentry, ei, i, mode, data, fops); goto out; } out: mutex_unlock(&eventfs_mutex); return result; } /* * Walk the children of a eventfs_inode to fill in getdents(). */ static int eventfs_iterate(struct file *file, struct dir_context *ctx) { const struct file_operations *fops; struct inode *f_inode = file_inode(file); const struct eventfs_entry *entry; struct eventfs_inode *ei_child; struct tracefs_inode *ti; struct eventfs_inode *ei; const char *name; umode_t mode; int idx; int ret = -EINVAL; int ino; int i, r, c; if (!dir_emit_dots(file, ctx)) return 0; ti = get_tracefs(f_inode); if (!(ti->flags & TRACEFS_EVENT_INODE)) return -EINVAL; c = ctx->pos - 2; idx = srcu_read_lock(&eventfs_srcu); mutex_lock(&eventfs_mutex); ei = READ_ONCE(ti->private); if (ei && ei->is_freed) ei = NULL; mutex_unlock(&eventfs_mutex); if (!ei) goto out; /* * Need to create the dentries and inodes to have a consistent * inode number. */ ret = 0; /* Start at 'c' to jump over already read entries */ for (i = c; i < ei->nr_entries; i++, ctx->pos++) { void *cdata = ei->data; entry = &ei->entries[i]; name = entry->name; mutex_lock(&eventfs_mutex); /* If ei->is_freed then just bail here, nothing more to do */ if (ei->is_freed) { mutex_unlock(&eventfs_mutex); goto out; } r = entry->callback(name, &mode, &cdata, &fops); mutex_unlock(&eventfs_mutex); if (r <= 0) continue; ino = EVENTFS_FILE_INODE_INO; if (!dir_emit(ctx, name, strlen(name), ino, DT_REG)) goto out; } /* Subtract the skipped entries above */ c -= min((unsigned int)c, (unsigned int)ei->nr_entries); list_for_each_entry_srcu(ei_child, &ei->children, list, srcu_read_lock_held(&eventfs_srcu)) { if (c > 0) { c--; continue; } ctx->pos++; if (ei_child->is_freed) continue; name = ei_child->name; ino = eventfs_dir_ino(ei_child); if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR)) goto out_dec; } ret = 1; out: srcu_read_unlock(&eventfs_srcu, idx); return ret; out_dec: /* Incremented ctx->pos without adding something, reset it */ ctx->pos--; goto out; } /** * eventfs_create_dir - Create the eventfs_inode for this directory * @name: The name of the directory to create. * @parent: The eventfs_inode of the parent directory. * @entries: A list of entries that represent the files under this directory * @size: The number of @entries * @data: The default data to pass to the files (an entry may override it). * * This function creates the descriptor to represent a directory in the * eventfs. This descriptor is an eventfs_inode, and it is returned to be * used to create other children underneath. * * The @entries is an array of eventfs_entry structures which has: * const char *name * eventfs_callback callback; * * The name is the name of the file, and the callback is a pointer to a function * that will be called when the file is reference (either by lookup or by * reading a directory). The callback is of the prototype: * * int callback(const char *name, umode_t *mode, void **data, * const struct file_operations **fops); * * When a file needs to be created, this callback will be called with * name = the name of the file being created (so that the same callback * may be used for multiple files). * mode = a place to set the file's mode * data = A pointer to @data, and the callback may replace it, which will * cause the file created to pass the new data to the open() call. * fops = the fops to use for the created file. * * NB. @callback is called while holding internal locks of the eventfs * system. The callback must not call any code that might also call into * the tracefs or eventfs system or it will risk creating a deadlock. */ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, const struct eventfs_entry *entries, int size, void *data) { struct eventfs_inode *ei; if (!parent) return ERR_PTR(-EINVAL); ei = alloc_ei(name); if (!ei) return ERR_PTR(-ENOMEM); ei->entries = entries; ei->nr_entries = size; ei->data = data; INIT_LIST_HEAD(&ei->children); INIT_LIST_HEAD(&ei->list); mutex_lock(&eventfs_mutex); if (!parent->is_freed) list_add_tail(&ei->list, &parent->children); mutex_unlock(&eventfs_mutex); /* Was the parent freed? */ if (list_empty(&ei->list)) { cleanup_ei(ei); ei = ERR_PTR(-EBUSY); } return ei; } /** * eventfs_create_events_dir - create the top level events directory * @name: The name of the top level directory to create. * @parent: Parent dentry for this file in the tracefs directory. * @entries: A list of entries that represent the files under this directory * @size: The number of @entries * @data: The default data to pass to the files (an entry may override it). * * This function creates the top of the trace event directory. * * See eventfs_create_dir() for use of @entries. */ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, const struct eventfs_entry *entries, int size, void *data) { struct dentry *dentry = tracefs_start_creating(name, parent); struct eventfs_root_inode *rei; struct eventfs_inode *ei; struct tracefs_inode *ti; struct inode *inode; kuid_t uid; kgid_t gid; if (security_locked_down(LOCKDOWN_TRACEFS)) return NULL; if (IS_ERR(dentry)) return ERR_CAST(dentry); ei = alloc_root_ei(name); if (!ei) goto fail; inode = tracefs_get_inode(dentry->d_sb); if (unlikely(!inode)) goto fail; // Note: we have a ref to the dentry from tracefs_start_creating() rei = get_root_inode(ei); rei->events_dir = dentry; ei->entries = entries; ei->nr_entries = size; ei->data = data; /* Save the ownership of this directory */ uid = d_inode(dentry->d_parent)->i_uid; gid = d_inode(dentry->d_parent)->i_gid; /* * The ei->attr will be used as the default values for the * files beneath this directory. */ ei->attr.uid = uid; ei->attr.gid = gid; INIT_LIST_HEAD(&ei->children); INIT_LIST_HEAD(&ei->list); ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE; ti->private = ei; inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; inode->i_uid = uid; inode->i_gid = gid; inode->i_op = &eventfs_dir_inode_operations; inode->i_fop = &eventfs_file_operations; dentry->d_fsdata = get_ei(ei); /* * Keep all eventfs directories with i_nlink == 1. * Due to the dynamic nature of the dentry creations and not * wanting to add a pointer to the parent eventfs_inode in the * eventfs_inode structure, keeping the i_nlink in sync with the * number of directories would cause too much complexity for * something not worth much. Keeping directory links at 1 * tells userspace not to trust the link number. */ d_instantiate(dentry, inode); /* The dentry of the "events" parent does keep track though */ inc_nlink(dentry->d_parent->d_inode); fsnotify_mkdir(dentry->d_parent->d_inode, dentry); tracefs_end_creating(dentry); return ei; fail: cleanup_ei(ei); tracefs_failed_creating(dentry); return ERR_PTR(-ENOMEM); } /** * eventfs_remove_rec - remove eventfs dir or file from list * @ei: eventfs_inode to be removed. * @level: prevent recursion from going more than 3 levels deep. * * This function recursively removes eventfs_inodes which * contains info of files and/or directories. */ static void eventfs_remove_rec(struct eventfs_inode *ei, int level) { struct eventfs_inode *ei_child; /* * Check recursion depth. It should never be greater than 3: * 0 - events/ * 1 - events/group/ * 2 - events/group/event/ * 3 - events/group/event/file */ if (WARN_ON_ONCE(level > 3)) return; /* search for nested folders or files */ list_for_each_entry(ei_child, &ei->children, list) eventfs_remove_rec(ei_child, level + 1); list_del_rcu(&ei->list); free_ei(ei); } /** * eventfs_remove_dir - remove eventfs dir or file from list * @ei: eventfs_inode to be removed. * * This function acquire the eventfs_mutex lock and call eventfs_remove_rec() */ void eventfs_remove_dir(struct eventfs_inode *ei) { if (!ei) return; mutex_lock(&eventfs_mutex); eventfs_remove_rec(ei, 0); mutex_unlock(&eventfs_mutex); } /** * eventfs_remove_events_dir - remove the top level eventfs directory * @ei: the event_inode returned by eventfs_create_events_dir(). * * This function removes the events main directory */ void eventfs_remove_events_dir(struct eventfs_inode *ei) { struct eventfs_root_inode *rei; struct dentry *dentry; rei = get_root_inode(ei); dentry = rei->events_dir; if (!dentry) return; rei->events_dir = NULL; eventfs_remove_dir(ei); /* * Matches the dget() done by tracefs_start_creating() * in eventfs_create_events_dir() when it the dentry was * created. In other words, it's a normal dentry that * sticks around while the other ei->dentry are created * and destroyed dynamically. */ d_invalidate(dentry); dput(dentry); }
10 45 38 35 37 38 38 36 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 // SPDX-License-Identifier: GPL-2.0-or-later /* * acpi_bus.c - ACPI Bus Driver ($Revision: 80 $) * * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> */ #define pr_fmt(fmt) "ACPI: " fmt #include <linux/module.h> #include <linux/init.h> #include <linux/ioport.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/sched.h> #include <linux/pm.h> #include <linux/device.h> #include <linux/proc_fs.h> #include <linux/acpi.h> #include <linux/slab.h> #include <linux/regulator/machine.h> #include <linux/workqueue.h> #include <linux/reboot.h> #include <linux/delay.h> #ifdef CONFIG_X86 #include <asm/mpspec.h> #include <linux/dmi.h> #endif #include <linux/acpi_viot.h> #include <linux/pci.h> #include <acpi/apei.h> #include <linux/suspend.h> #include <linux/prmt.h> #include "internal.h" struct acpi_device *acpi_root; struct proc_dir_entry *acpi_root_dir; EXPORT_SYMBOL(acpi_root_dir); #ifdef CONFIG_X86 #ifdef CONFIG_ACPI_CUSTOM_DSDT static inline int set_copy_dsdt(const struct dmi_system_id *id) { return 0; } #else static int set_copy_dsdt(const struct dmi_system_id *id) { pr_notice("%s detected - force copy of DSDT to local memory\n", id->ident); acpi_gbl_copy_dsdt_locally = 1; return 0; } #endif static const struct dmi_system_id dsdt_dmi_table[] __initconst = { /* * Invoke DSDT corruption work-around on all Toshiba Satellite. * https://bugzilla.kernel.org/show_bug.cgi?id=14679 */ { .callback = set_copy_dsdt, .ident = "TOSHIBA Satellite", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), DMI_MATCH(DMI_PRODUCT_NAME, "Satellite"), }, }, {} }; #endif /* -------------------------------------------------------------------------- Device Management -------------------------------------------------------------------------- */ acpi_status acpi_bus_get_status_handle(acpi_handle handle, unsigned long long *sta) { acpi_status status; status = acpi_evaluate_integer(handle, "_STA", NULL, sta); if (ACPI_SUCCESS(status)) return AE_OK; if (status == AE_NOT_FOUND) { *sta = ACPI_STA_DEVICE_PRESENT | ACPI_STA_DEVICE_ENABLED | ACPI_STA_DEVICE_UI | ACPI_STA_DEVICE_FUNCTIONING; return AE_OK; } return status; } EXPORT_SYMBOL_GPL(acpi_bus_get_status_handle); int acpi_bus_get_status(struct acpi_device *device) { acpi_status status; unsigned long long sta; if (acpi_device_override_status(device, &sta)) { acpi_set_device_status(device, sta); return 0; } /* Battery devices must have their deps met before calling _STA */ if (acpi_device_is_battery(device) && device->dep_unmet) { acpi_set_device_status(device, 0); return 0; } status = acpi_bus_get_status_handle(device->handle, &sta); if (ACPI_FAILURE(status)) return -ENODEV; if (!device->status.present && device->status.enabled) { pr_info(FW_BUG "Device [%s] status [%08x]: not present and enabled\n", device->pnp.bus_id, (u32)sta); device->status.enabled = 0; /* * The status is clearly invalid, so clear the functional bit as * well to avoid attempting to use the device. */ device->status.functional = 0; } acpi_set_device_status(device, sta); if (device->status.functional && !device->status.present) { pr_debug("Device [%s] status [%08x]: functional but not present\n", device->pnp.bus_id, (u32)sta); } pr_debug("Device [%s] status [%08x]\n", device->pnp.bus_id, (u32)sta); return 0; } EXPORT_SYMBOL(acpi_bus_get_status); void acpi_bus_private_data_handler(acpi_handle handle, void *context) { return; } EXPORT_SYMBOL(acpi_bus_private_data_handler); int acpi_bus_attach_private_data(acpi_handle handle, void *data) { acpi_status status; status = acpi_attach_data(handle, acpi_bus_private_data_handler, data); if (ACPI_FAILURE(status)) { acpi_handle_debug(handle, "Error attaching device data\n"); return -ENODEV; } return 0; } EXPORT_SYMBOL_GPL(acpi_bus_attach_private_data); int acpi_bus_get_private_data(acpi_handle handle, void **data) { acpi_status status; if (!data) return -EINVAL; status = acpi_get_data(handle, acpi_bus_private_data_handler, data); if (ACPI_FAILURE(status)) { acpi_handle_debug(handle, "No context for object\n"); return -ENODEV; } return 0; } EXPORT_SYMBOL_GPL(acpi_bus_get_private_data); void acpi_bus_detach_private_data(acpi_handle handle) { acpi_detach_data(handle, acpi_bus_private_data_handler); } EXPORT_SYMBOL_GPL(acpi_bus_detach_private_data); static void acpi_print_osc_error(acpi_handle handle, struct acpi_osc_context *context, char *error) { int i; acpi_handle_debug(handle, "(%s): %s\n", context->uuid_str, error); pr_debug("_OSC request data:"); for (i = 0; i < context->cap.length; i += sizeof(u32)) pr_debug(" %x", *((u32 *)(context->cap.pointer + i))); pr_debug("\n"); } acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context) { acpi_status status; struct acpi_object_list input; union acpi_object in_params[4]; union acpi_object *out_obj; guid_t guid; u32 errors; struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; if (!context) return AE_ERROR; if (guid_parse(context->uuid_str, &guid)) return AE_ERROR; context->ret.length = ACPI_ALLOCATE_BUFFER; context->ret.pointer = NULL; /* Setting up input parameters */ input.count = 4; input.pointer = in_params; in_params[0].type = ACPI_TYPE_BUFFER; in_params[0].buffer.length = 16; in_params[0].buffer.pointer = (u8 *)&guid; in_params[1].type = ACPI_TYPE_INTEGER; in_params[1].integer.value = context->rev; in_params[2].type = ACPI_TYPE_INTEGER; in_params[2].integer.value = context->cap.length/sizeof(u32); in_params[3].type = ACPI_TYPE_BUFFER; in_params[3].buffer.length = context->cap.length; in_params[3].buffer.pointer = context->cap.pointer; status = acpi_evaluate_object(handle, "_OSC", &input, &output); if (ACPI_FAILURE(status)) return status; if (!output.length) return AE_NULL_OBJECT; out_obj = output.pointer; if (out_obj->type != ACPI_TYPE_BUFFER || out_obj->buffer.length != context->cap.length) { acpi_print_osc_error(handle, context, "_OSC evaluation returned wrong type"); status = AE_TYPE; goto out_kfree; } /* Need to ignore the bit0 in result code */ errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); if (errors) { if (errors & OSC_REQUEST_ERROR) acpi_print_osc_error(handle, context, "_OSC request failed"); if (errors & OSC_INVALID_UUID_ERROR) acpi_print_osc_error(handle, context, "_OSC invalid UUID"); if (errors & OSC_INVALID_REVISION_ERROR) acpi_print_osc_error(handle, context, "_OSC invalid revision"); if (errors & OSC_CAPABILITIES_MASK_ERROR) { if (((u32 *)context->cap.pointer)[OSC_QUERY_DWORD] & OSC_QUERY_ENABLE) goto out_success; status = AE_SUPPORT; goto out_kfree; } status = AE_ERROR; goto out_kfree; } out_success: context->ret.length = out_obj->buffer.length; context->ret.pointer = kmemdup(out_obj->buffer.pointer, context->ret.length, GFP_KERNEL); if (!context->ret.pointer) { status = AE_NO_MEMORY; goto out_kfree; } status = AE_OK; out_kfree: kfree(output.pointer); return status; } EXPORT_SYMBOL(acpi_run_osc); bool osc_sb_apei_support_acked; /* * ACPI 6.0 Section 8.4.4.2 Idle State Coordination * OSPM supports platform coordinated low power idle(LPI) states */ bool osc_pc_lpi_support_confirmed; EXPORT_SYMBOL_GPL(osc_pc_lpi_support_confirmed); /* * ACPI 6.2 Section 6.2.11.2 'Platform-Wide OSPM Capabilities': * Starting with ACPI Specification 6.2, all _CPC registers can be in * PCC, System Memory, System IO, or Functional Fixed Hardware address * spaces. OSPM support for this more flexible register space scheme is * indicated by the “Flexible Address Space for CPPC Registers” _OSC bit. * * Otherwise (cf ACPI 6.1, s8.4.7.1.1.X), _CPC registers must be in: * - PCC or Functional Fixed Hardware address space if defined * - SystemMemory address space (NULL register) if not defined */ bool osc_cpc_flexible_adr_space_confirmed; EXPORT_SYMBOL_GPL(osc_cpc_flexible_adr_space_confirmed); /* * ACPI 6.4 Operating System Capabilities for USB. */ bool osc_sb_native_usb4_support_confirmed; EXPORT_SYMBOL_GPL(osc_sb_native_usb4_support_confirmed); bool osc_sb_cppc2_support_acked; static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48"; static void acpi_bus_osc_negotiate_platform_control(void) { u32 capbuf[2], *capbuf_ret; struct acpi_osc_context context = { .uuid_str = sb_uuid_str, .rev = 1, .cap.length = 8, .cap.pointer = capbuf, }; acpi_handle handle; capbuf[OSC_QUERY_DWORD] = OSC_QUERY_ENABLE; capbuf[OSC_SUPPORT_DWORD] = OSC_SB_PR3_SUPPORT; /* _PR3 is in use */ if (IS_ENABLED(CONFIG_ACPI_PROCESSOR_AGGREGATOR)) capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PAD_SUPPORT; if (IS_ENABLED(CONFIG_ACPI_PROCESSOR)) capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PPC_OST_SUPPORT; if (IS_ENABLED(CONFIG_ACPI_THERMAL)) capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_FAST_THERMAL_SAMPLING_SUPPORT; if (IS_ENABLED(CONFIG_ACPI_BATTERY)) capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_BATTERY_CHARGE_LIMITING_SUPPORT; capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_HOTPLUG_OST_SUPPORT; capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PCLPI_SUPPORT; capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_OVER_16_PSTATES_SUPPORT; capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_GED_SUPPORT; capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_IRQ_RESOURCE_SOURCE_SUPPORT; if (IS_ENABLED(CONFIG_ACPI_PRMT)) capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PRM_SUPPORT; if (IS_ENABLED(CONFIG_ACPI_FFH)) capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_FFH_OPR_SUPPORT; #ifdef CONFIG_ARM64 capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_GENERIC_INITIATOR_SUPPORT; #endif #ifdef CONFIG_X86 capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_GENERIC_INITIATOR_SUPPORT; #endif #ifdef CONFIG_ACPI_CPPC_LIB capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT; capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT; #endif capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_FLEXIBLE_ADR_SPACE; if (IS_ENABLED(CONFIG_SCHED_MC_PRIO)) capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_DIVERSE_HIGH_SUPPORT; if (IS_ENABLED(CONFIG_USB4)) capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_NATIVE_USB4_SUPPORT; if (!ghes_disable) capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_APEI_SUPPORT; if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) return; if (ACPI_FAILURE(acpi_run_osc(handle, &context))) return; capbuf_ret = context.ret.pointer; if (context.ret.length <= OSC_SUPPORT_DWORD) { kfree(context.ret.pointer); return; } /* * Now run _OSC again with query flag clear and with the caps * supported by both the OS and the platform. */ capbuf[OSC_QUERY_DWORD] = 0; capbuf[OSC_SUPPORT_DWORD] = capbuf_ret[OSC_SUPPORT_DWORD]; kfree(context.ret.pointer); if (ACPI_FAILURE(acpi_run_osc(handle, &context))) return; capbuf_ret = context.ret.pointer; if (context.ret.length > OSC_SUPPORT_DWORD) { #ifdef CONFIG_ACPI_CPPC_LIB osc_sb_cppc2_support_acked = capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_CPCV2_SUPPORT; #endif osc_sb_apei_support_acked = capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT; osc_pc_lpi_support_confirmed = capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT; osc_sb_native_usb4_support_confirmed = capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT; osc_cpc_flexible_adr_space_confirmed = capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_CPC_FLEXIBLE_ADR_SPACE; } kfree(context.ret.pointer); } /* * Native control of USB4 capabilities. If any of the tunneling bits is * set it means OS is in control and we use software based connection * manager. */ u32 osc_sb_native_usb4_control; EXPORT_SYMBOL_GPL(osc_sb_native_usb4_control); static void acpi_bus_decode_usb_osc(const char *msg, u32 bits) { pr_info("%s USB3%c DisplayPort%c PCIe%c XDomain%c\n", msg, (bits & OSC_USB_USB3_TUNNELING) ? '+' : '-', (bits & OSC_USB_DP_TUNNELING) ? '+' : '-', (bits & OSC_USB_PCIE_TUNNELING) ? '+' : '-', (bits & OSC_USB_XDOMAIN) ? '+' : '-'); } static u8 sb_usb_uuid_str[] = "23A0D13A-26AB-486C-9C5F-0FFA525A575A"; static void acpi_bus_osc_negotiate_usb_control(void) { u32 capbuf[3], *capbuf_ret; struct acpi_osc_context context = { .uuid_str = sb_usb_uuid_str, .rev = 1, .cap.length = sizeof(capbuf), .cap.pointer = capbuf, }; acpi_handle handle; acpi_status status; u32 control; if (!osc_sb_native_usb4_support_confirmed) return; if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) return; control = OSC_USB_USB3_TUNNELING | OSC_USB_DP_TUNNELING | OSC_USB_PCIE_TUNNELING | OSC_USB_XDOMAIN; /* * Run _OSC first with query bit set, trying to get control over * all tunneling. The platform can then clear out bits in the * control dword that it does not want to grant to the OS. */ capbuf[OSC_QUERY_DWORD] = OSC_QUERY_ENABLE; capbuf[OSC_SUPPORT_DWORD] = 0; capbuf[OSC_CONTROL_DWORD] = control; status = acpi_run_osc(handle, &context); if (ACPI_FAILURE(status)) return; if (context.ret.length != sizeof(capbuf)) { pr_info("USB4 _OSC: returned invalid length buffer\n"); goto out_free; } /* * Run _OSC again now with query bit clear and the control dword * matching what the platform granted (which may not have all * the control bits set). */ capbuf_ret = context.ret.pointer; capbuf[OSC_QUERY_DWORD] = 0; capbuf[OSC_CONTROL_DWORD] = capbuf_ret[OSC_CONTROL_DWORD]; kfree(context.ret.pointer); status = acpi_run_osc(handle, &context); if (ACPI_FAILURE(status)) return; if (context.ret.length != sizeof(capbuf)) { pr_info("USB4 _OSC: returned invalid length buffer\n"); goto out_free; } osc_sb_native_usb4_control = control & acpi_osc_ctx_get_pci_control(&context); acpi_bus_decode_usb_osc("USB4 _OSC: OS supports", control); acpi_bus_decode_usb_osc("USB4 _OSC: OS controls", osc_sb_native_usb4_control); out_free: kfree(context.ret.pointer); } /* -------------------------------------------------------------------------- Notification Handling -------------------------------------------------------------------------- */ /** * acpi_bus_notify - Global system-level (0x00-0x7F) notifications handler * @handle: Target ACPI object. * @type: Notification type. * @data: Ignored. * * This only handles notifications related to device hotplug. */ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data) { struct acpi_device *adev; switch (type) { case ACPI_NOTIFY_BUS_CHECK: acpi_handle_debug(handle, "ACPI_NOTIFY_BUS_CHECK event\n"); break; case ACPI_NOTIFY_DEVICE_CHECK: acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK event\n"); break; case ACPI_NOTIFY_DEVICE_WAKE: acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_WAKE event\n"); return; case ACPI_NOTIFY_EJECT_REQUEST: acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n"); break; case ACPI_NOTIFY_DEVICE_CHECK_LIGHT: acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK_LIGHT event\n"); /* TBD: Exactly what does 'light' mean? */ return; case ACPI_NOTIFY_FREQUENCY_MISMATCH: acpi_handle_err(handle, "Device cannot be configured due " "to a frequency mismatch\n"); return; case ACPI_NOTIFY_BUS_MODE_MISMATCH: acpi_handle_err(handle, "Device cannot be configured due " "to a bus mode mismatch\n"); return; case ACPI_NOTIFY_POWER_FAULT: acpi_handle_err(handle, "Device has suffered a power fault\n"); return; default: acpi_handle_debug(handle, "Unknown event type 0x%x\n", type); return; } adev = acpi_get_acpi_dev(handle); if (adev && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type))) return; acpi_put_acpi_dev(adev); acpi_evaluate_ost(handle, type, ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL); } static void acpi_notify_device(acpi_handle handle, u32 event, void *data) { struct acpi_device *device = data; struct acpi_driver *acpi_drv = to_acpi_driver(device->dev.driver); acpi_drv->ops.notify(device, event); } static int acpi_device_install_notify_handler(struct acpi_device *device, struct acpi_driver *acpi_drv) { u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ? ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY; acpi_status status; status = acpi_install_notify_handler(device->handle, type, acpi_notify_device, device); if (ACPI_FAILURE(status)) return -EINVAL; return 0; } static void acpi_device_remove_notify_handler(struct acpi_device *device, struct acpi_driver *acpi_drv) { u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ? ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY; acpi_remove_notify_handler(device->handle, type, acpi_notify_device); acpi_os_wait_events_complete(); } int acpi_dev_install_notify_handler(struct acpi_device *adev, u32 handler_type, acpi_notify_handler handler, void *context) { acpi_status status; status = acpi_install_notify_handler(adev->handle, handler_type, handler, context); if (ACPI_FAILURE(status)) return -ENODEV; return 0; } EXPORT_SYMBOL_GPL(acpi_dev_install_notify_handler); void acpi_dev_remove_notify_handler(struct acpi_device *adev, u32 handler_type, acpi_notify_handler handler) { acpi_remove_notify_handler(adev->handle, handler_type, handler); acpi_os_wait_events_complete(); } EXPORT_SYMBOL_GPL(acpi_dev_remove_notify_handler); /* Handle events targeting \_SB device (at present only graceful shutdown) */ #define ACPI_SB_NOTIFY_SHUTDOWN_REQUEST 0x81 #define ACPI_SB_INDICATE_INTERVAL 10000 static void sb_notify_work(struct work_struct *dummy) { acpi_handle sb_handle; orderly_poweroff(true); /* * After initiating graceful shutdown, the ACPI spec requires OSPM * to evaluate _OST method once every 10seconds to indicate that * the shutdown is in progress */ acpi_get_handle(NULL, "\\_SB", &sb_handle); while (1) { pr_info("Graceful shutdown in progress.\n"); acpi_evaluate_ost(sb_handle, ACPI_OST_EC_OSPM_SHUTDOWN, ACPI_OST_SC_OS_SHUTDOWN_IN_PROGRESS, NULL); msleep(ACPI_SB_INDICATE_INTERVAL); } } static void acpi_sb_notify(acpi_handle handle, u32 event, void *data) { static DECLARE_WORK(acpi_sb_work, sb_notify_work); if (event == ACPI_SB_NOTIFY_SHUTDOWN_REQUEST) { if (!work_busy(&acpi_sb_work)) schedule_work(&acpi_sb_work); } else { pr_warn("event %x is not supported by \\_SB device\n", event); } } static int __init acpi_setup_sb_notify_handler(void) { acpi_handle sb_handle; if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &sb_handle))) return -ENXIO; if (ACPI_FAILURE(acpi_install_notify_handler(sb_handle, ACPI_DEVICE_NOTIFY, acpi_sb_notify, NULL))) return -EINVAL; return 0; } /* -------------------------------------------------------------------------- Device Matching -------------------------------------------------------------------------- */ /** * acpi_get_first_physical_node - Get first physical node of an ACPI device * @adev: ACPI device in question * * Return: First physical node of ACPI device @adev */ struct device *acpi_get_first_physical_node(struct acpi_device *adev) { struct mutex *physical_node_lock = &adev->physical_node_lock; struct device *phys_dev; mutex_lock(physical_node_lock); if (list_empty(&adev->physical_node_list)) { phys_dev = NULL; } else { const struct acpi_device_physical_node *node; node = list_first_entry(&adev->physical_node_list, struct acpi_device_physical_node, node); phys_dev = node->dev; } mutex_unlock(physical_node_lock); return phys_dev; } EXPORT_SYMBOL_GPL(acpi_get_first_physical_node); static struct acpi_device *acpi_primary_dev_companion(struct acpi_device *adev, const struct device *dev) { const struct device *phys_dev = acpi_get_first_physical_node(adev); return phys_dev && phys_dev == dev ? adev : NULL; } /** * acpi_device_is_first_physical_node - Is given dev first physical node * @adev: ACPI companion device * @dev: Physical device to check * * Function checks if given @dev is the first physical devices attached to * the ACPI companion device. This distinction is needed in some cases * where the same companion device is shared between many physical devices. * * Note that the caller have to provide valid @adev pointer. */ bool acpi_device_is_first_physical_node(struct acpi_device *adev, const struct device *dev) { return !!acpi_primary_dev_companion(adev, dev); } /* * acpi_companion_match() - Can we match via ACPI companion device * @dev: Device in question * * Check if the given device has an ACPI companion and if that companion has * a valid list of PNP IDs, and if the device is the first (primary) physical * device associated with it. Return the companion pointer if that's the case * or NULL otherwise. * * If multiple physical devices are attached to a single ACPI companion, we need * to be careful. The usage scenario for this kind of relationship is that all * of the physical devices in question use resources provided by the ACPI * companion. A typical case is an MFD device where all the sub-devices share * the parent's ACPI companion. In such cases we can only allow the primary * (first) physical device to be matched with the help of the companion's PNP * IDs. * * Additional physical devices sharing the ACPI companion can still use * resources available from it but they will be matched normally using functions * provided by their bus types (and analogously for their modalias). */ const struct acpi_device *acpi_companion_match(const struct device *dev) { struct acpi_device *adev; adev = ACPI_COMPANION(dev); if (!adev) return NULL; if (list_empty(&adev->pnp.ids)) return NULL; return acpi_primary_dev_companion(adev, dev); } /** * acpi_of_match_device - Match device object using the "compatible" property. * @adev: ACPI device object to match. * @of_match_table: List of device IDs to match against. * @of_id: OF ID if matched * * If @dev has an ACPI companion which has ACPI_DT_NAMESPACE_HID in its list of * identifiers and a _DSD object with the "compatible" property, use that * property to match against the given list of identifiers. */ static bool acpi_of_match_device(const struct acpi_device *adev, const struct of_device_id *of_match_table, const struct of_device_id **of_id) { const union acpi_object *of_compatible, *obj; int i, nval; if (!adev) return false; of_compatible = adev->data.of_compatible; if (!of_match_table || !of_compatible) return false; if (of_compatible->type == ACPI_TYPE_PACKAGE) { nval = of_compatible->package.count; obj = of_compatible->package.elements; } else { /* Must be ACPI_TYPE_STRING. */ nval = 1; obj = of_compatible; } /* Now we can look for the driver DT compatible strings */ for (i = 0; i < nval; i++, obj++) { const struct of_device_id *id; for (id = of_match_table; id->compatible[0]; id++) if (!strcasecmp(obj->string.pointer, id->compatible)) { if (of_id) *of_id = id; return true; } } return false; } static bool acpi_of_modalias(struct acpi_device *adev, char *modalias, size_t len) { const union acpi_object *of_compatible; const union acpi_object *obj; const char *str, *chr; of_compatible = adev->data.of_compatible; if (!of_compatible) return false; if (of_compatible->type == ACPI_TYPE_PACKAGE) obj = of_compatible->package.elements; else /* Must be ACPI_TYPE_STRING. */ obj = of_compatible; str = obj->string.pointer; chr = strchr(str, ','); strscpy(modalias, chr ? chr + 1 : str, len); return true; } /** * acpi_set_modalias - Set modalias using "compatible" property or supplied ID * @adev: ACPI device object to match * @default_id: ID string to use as default if no compatible string found * @modalias: Pointer to buffer that modalias value will be copied into * @len: Length of modalias buffer * * This is a counterpart of of_alias_from_compatible() for struct acpi_device * objects. If there is a compatible string for @adev, it will be copied to * @modalias with the vendor prefix stripped; otherwise, @default_id will be * used. */ void acpi_set_modalias(struct acpi_device *adev, const char *default_id, char *modalias, size_t len) { if (!acpi_of_modalias(adev, modalias, len)) strscpy(modalias, default_id, len); } EXPORT_SYMBOL_GPL(acpi_set_modalias); static bool __acpi_match_device_cls(const struct acpi_device_id *id, struct acpi_hardware_id *hwid) { int i, msk, byte_shift; char buf[3]; if (!id->cls) return false; /* Apply class-code bitmask, before checking each class-code byte */ for (i = 1; i <= 3; i++) { byte_shift = 8 * (3 - i); msk = (id->cls_msk >> byte_shift) & 0xFF; if (!msk) continue; sprintf(buf, "%02x", (id->cls >> byte_shift) & msk); if (strncmp(buf, &hwid->id[(i - 1) * 2], 2)) return false; } return true; } static bool __acpi_match_device(const struct acpi_device *device, const struct acpi_device_id *acpi_ids, const struct of_device_id *of_ids, const struct acpi_device_id **acpi_id, const struct of_device_id **of_id) { const struct acpi_device_id *id; struct acpi_hardware_id *hwid; /* * If the device is not present, it is unnecessary to load device * driver for it. */ if (!device || !device->status.present) return false; list_for_each_entry(hwid, &device->pnp.ids, list) { /* First, check the ACPI/PNP IDs provided by the caller. */ if (acpi_ids) { for (id = acpi_ids; id->id[0] || id->cls; id++) { if (id->id[0] && !strcmp((char *)id->id, hwid->id)) goto out_acpi_match; if (id->cls && __acpi_match_device_cls(id, hwid)) goto out_acpi_match; } } /* * Next, check ACPI_DT_NAMESPACE_HID and try to match the * "compatible" property if found. */ if (!strcmp(ACPI_DT_NAMESPACE_HID, hwid->id)) return acpi_of_match_device(device, of_ids, of_id); } return false; out_acpi_match: if (acpi_id) *acpi_id = id; return true; } /** * acpi_match_acpi_device - Match an ACPI device against a given list of ACPI IDs * @ids: Array of struct acpi_device_id objects to match against. * @adev: The ACPI device pointer to match. * * Match the ACPI device @adev against a given list of ACPI IDs @ids. * * Return: * a pointer to the first matching ACPI ID on success or %NULL on failure. */ const struct acpi_device_id *acpi_match_acpi_device(const struct acpi_device_id *ids, const struct acpi_device *adev) { const struct acpi_device_id *id = NULL; __acpi_match_device(adev, ids, NULL, &id, NULL); return id; } EXPORT_SYMBOL_GPL(acpi_match_acpi_device); /** * acpi_match_device - Match a struct device against a given list of ACPI IDs * @ids: Array of struct acpi_device_id object to match against. * @dev: The device structure to match. * * Check if @dev has a valid ACPI handle and if there is a struct acpi_device * object for that handle and use that object to match against a given list of * device IDs. * * Return a pointer to the first matching ID on success or %NULL on failure. */ const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, const struct device *dev) { return acpi_match_acpi_device(ids, acpi_companion_match(dev)); } EXPORT_SYMBOL_GPL(acpi_match_device); static const void *acpi_of_device_get_match_data(const struct device *dev) { struct acpi_device *adev = ACPI_COMPANION(dev); const struct of_device_id *match = NULL; if (!acpi_of_match_device(adev, dev->driver->of_match_table, &match)) return NULL; return match->data; } const void *acpi_device_get_match_data(const struct device *dev) { const struct acpi_device_id *acpi_ids = dev->driver->acpi_match_table; const struct acpi_device_id *match; if (!acpi_ids) return acpi_of_device_get_match_data(dev); match = acpi_match_device(acpi_ids, dev); if (!match) return NULL; return (const void *)match->driver_data; } EXPORT_SYMBOL_GPL(acpi_device_get_match_data); int acpi_match_device_ids(struct acpi_device *device, const struct acpi_device_id *ids) { return __acpi_match_device(device, ids, NULL, NULL, NULL) ? 0 : -ENOENT; } EXPORT_SYMBOL(acpi_match_device_ids); bool acpi_driver_match_device(struct device *dev, const struct device_driver *drv) { const struct acpi_device_id *acpi_ids = drv->acpi_match_table; const struct of_device_id *of_ids = drv->of_match_table; if (!acpi_ids) return acpi_of_match_device(ACPI_COMPANION(dev), of_ids, NULL); return __acpi_match_device(acpi_companion_match(dev), acpi_ids, of_ids, NULL, NULL); } EXPORT_SYMBOL_GPL(acpi_driver_match_device); /* -------------------------------------------------------------------------- ACPI Driver Management -------------------------------------------------------------------------- */ /** * __acpi_bus_register_driver - register a driver with the ACPI bus * @driver: driver being registered * @owner: owning module/driver * * Registers a driver with the ACPI bus. Searches the namespace for all * devices that match the driver's criteria and binds. Returns zero for * success or a negative error status for failure. */ int __acpi_bus_register_driver(struct acpi_driver *driver, struct module *owner) { if (acpi_disabled) return -ENODEV; driver->drv.name = driver->name; driver->drv.bus = &acpi_bus_type; driver->drv.owner = owner; return driver_register(&driver->drv); } EXPORT_SYMBOL(__acpi_bus_register_driver); /** * acpi_bus_unregister_driver - unregisters a driver with the ACPI bus * @driver: driver to unregister * * Unregisters a driver with the ACPI bus. Searches the namespace for all * devices that match the driver's criteria and unbinds. */ void acpi_bus_unregister_driver(struct acpi_driver *driver) { driver_unregister(&driver->drv); } EXPORT_SYMBOL(acpi_bus_unregister_driver); /* -------------------------------------------------------------------------- ACPI Bus operations -------------------------------------------------------------------------- */ static int acpi_bus_match(struct device *dev, const struct device_driver *drv) { struct acpi_device *acpi_dev = to_acpi_device(dev); const struct acpi_driver *acpi_drv = to_acpi_driver(drv); return acpi_dev->flags.match_driver && !acpi_match_device_ids(acpi_dev, acpi_drv->ids); } static int acpi_device_uevent(const struct device *dev, struct kobj_uevent_env *env) { return __acpi_device_uevent_modalias(to_acpi_device(dev), env); } static int acpi_device_probe(struct device *dev) { struct acpi_device *acpi_dev = to_acpi_device(dev); struct acpi_driver *acpi_drv = to_acpi_driver(dev->driver); int ret; if (acpi_dev->handler && !acpi_is_pnp_device(acpi_dev)) return -EINVAL; if (!acpi_drv->ops.add) return -ENOSYS; ret = acpi_drv->ops.add(acpi_dev); if (ret) { acpi_dev->driver_data = NULL; return ret; } pr_debug("Driver [%s] successfully bound to device [%s]\n", acpi_drv->name, acpi_dev->pnp.bus_id); if (acpi_drv->ops.notify) { ret = acpi_device_install_notify_handler(acpi_dev, acpi_drv); if (ret) { if (acpi_drv->ops.remove) acpi_drv->ops.remove(acpi_dev); acpi_dev->driver_data = NULL; return ret; } } pr_debug("Found driver [%s] for device [%s]\n", acpi_drv->name, acpi_dev->pnp.bus_id); get_device(dev); return 0; } static void acpi_device_remove(struct device *dev) { struct acpi_device *acpi_dev = to_acpi_device(dev); struct acpi_driver *acpi_drv = to_acpi_driver(dev->driver); if (acpi_drv->ops.notify) acpi_device_remove_notify_handler(acpi_dev, acpi_drv); if (acpi_drv->ops.remove) acpi_drv->ops.remove(acpi_dev); acpi_dev->driver_data = NULL; put_device(dev); } const struct bus_type acpi_bus_type = { .name = "acpi", .match = acpi_bus_match, .probe = acpi_device_probe, .remove = acpi_device_remove, .uevent = acpi_device_uevent, }; int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data) { return bus_for_each_dev(&acpi_bus_type, NULL, data, fn); } EXPORT_SYMBOL_GPL(acpi_bus_for_each_dev); struct acpi_dev_walk_context { int (*fn)(struct acpi_device *, void *); void *data; }; static int acpi_dev_for_one_check(struct device *dev, void *context) { struct acpi_dev_walk_context *adwc = context; if (dev->bus != &acpi_bus_type) return 0; return adwc->fn(to_acpi_device(dev), adwc->data); } EXPORT_SYMBOL_GPL(acpi_dev_for_each_child); int acpi_dev_for_each_child(struct acpi_device *adev, int (*fn)(struct acpi_device *, void *), void *data) { struct acpi_dev_walk_context adwc = { .fn = fn, .data = data, }; return device_for_each_child(&adev->dev, &adwc, acpi_dev_for_one_check); } int acpi_dev_for_each_child_reverse(struct acpi_device *adev, int (*fn)(struct acpi_device *, void *), void *data) { struct acpi_dev_walk_context adwc = { .fn = fn, .data = data, }; return device_for_each_child_reverse(&adev->dev, &adwc, acpi_dev_for_one_check); } /* -------------------------------------------------------------------------- Initialization/Cleanup -------------------------------------------------------------------------- */ static int __init acpi_bus_init_irq(void) { acpi_status status; char *message = NULL; /* * Let the system know what interrupt model we are using by * evaluating the \_PIC object, if exists. */ switch (acpi_irq_model) { case ACPI_IRQ_MODEL_PIC: message = "PIC"; break; case ACPI_IRQ_MODEL_IOAPIC: message = "IOAPIC"; break; case ACPI_IRQ_MODEL_IOSAPIC: message = "IOSAPIC"; break; case ACPI_IRQ_MODEL_GIC: message = "GIC"; break; case ACPI_IRQ_MODEL_PLATFORM: message = "platform specific model"; break; case ACPI_IRQ_MODEL_LPIC: message = "LPIC"; break; case ACPI_IRQ_MODEL_RINTC: message = "RINTC"; break; default: pr_info("Unknown interrupt routing model\n"); return -ENODEV; } pr_info("Using %s for interrupt routing\n", message); status = acpi_execute_simple_method(NULL, "\\_PIC", acpi_irq_model); if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) { pr_info("_PIC evaluation failed: %s\n", acpi_format_exception(status)); return -ENODEV; } return 0; } /** * acpi_early_init - Initialize ACPICA and populate the ACPI namespace. * * The ACPI tables are accessible after this, but the handling of events has not * been initialized and the global lock is not available yet, so AML should not * be executed at this point. * * Doing this before switching the EFI runtime services to virtual mode allows * the EfiBootServices memory to be freed slightly earlier on boot. */ void __init acpi_early_init(void) { acpi_status status; if (acpi_disabled) return; pr_info("Core revision %08x\n", ACPI_CA_VERSION); /* enable workarounds, unless strict ACPI spec. compliance */ if (!acpi_strict) acpi_gbl_enable_interpreter_slack = TRUE; acpi_permanent_mmap = true; #ifdef CONFIG_X86 /* * If the machine falls into the DMI check table, * DSDT will be copied to memory. * Note that calling dmi_check_system() here on other architectures * would not be OK because only x86 initializes dmi early enough. * Thankfully only x86 systems need such quirks for now. */ dmi_check_system(dsdt_dmi_table); #endif status = acpi_reallocate_root_table(); if (ACPI_FAILURE(status)) { pr_err("Unable to reallocate ACPI tables\n"); goto error0; } status = acpi_initialize_subsystem(); if (ACPI_FAILURE(status)) { pr_err("Unable to initialize the ACPI Interpreter\n"); goto error0; } #ifdef CONFIG_X86 if (!acpi_ioapic) { /* compatible (0) means level (3) */ if (!(acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)) { acpi_sci_flags &= ~ACPI_MADT_TRIGGER_MASK; acpi_sci_flags |= ACPI_MADT_TRIGGER_LEVEL; } /* Set PIC-mode SCI trigger type */ acpi_pic_sci_set_trigger(acpi_gbl_FADT.sci_interrupt, (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2); } else { /* * now that acpi_gbl_FADT is initialized, * update it with result from INT_SRC_OVR parsing */ acpi_gbl_FADT.sci_interrupt = acpi_sci_override_gsi; } #endif return; error0: disable_acpi(); } /** * acpi_subsystem_init - Finalize the early initialization of ACPI. * * Switch over the platform to the ACPI mode (if possible). * * Doing this too early is generally unsafe, but at the same time it needs to be * done before all things that really depend on ACPI. The right spot appears to * be before finalizing the EFI initialization. */ void __init acpi_subsystem_init(void) { acpi_status status; if (acpi_disabled) return; status = acpi_enable_subsystem(~ACPI_NO_ACPI_ENABLE); if (ACPI_FAILURE(status)) { pr_err("Unable to enable ACPI\n"); disable_acpi(); } else { /* * If the system is using ACPI then we can be reasonably * confident that any regulators are managed by the firmware * so tell the regulator core it has everything it needs to * know. */ regulator_has_full_constraints(); } } static acpi_status acpi_bus_table_handler(u32 event, void *table, void *context) { if (event == ACPI_TABLE_EVENT_LOAD) acpi_scan_table_notify(); return acpi_sysfs_table_handler(event, table, context); } static int __init acpi_bus_init(void) { int result; acpi_status status; acpi_os_initialize1(); status = acpi_load_tables(); if (ACPI_FAILURE(status)) { pr_err("Unable to load the System Description Tables\n"); goto error1; } /* * ACPI 2.0 requires the EC driver to be loaded and work before the EC * device is found in the namespace. * * This is accomplished by looking for the ECDT table and getting the EC * parameters out of that. * * Do that before calling acpi_initialize_objects() which may trigger EC * address space accesses. */ acpi_ec_ecdt_probe(); status = acpi_enable_subsystem(ACPI_NO_ACPI_ENABLE); if (ACPI_FAILURE(status)) { pr_err("Unable to start the ACPI Interpreter\n"); goto error1; } status = acpi_initialize_objects(ACPI_FULL_INITIALIZATION); if (ACPI_FAILURE(status)) { pr_err("Unable to initialize ACPI objects\n"); goto error1; } /* * _OSC method may exist in module level code, * so it must be run after ACPI_FULL_INITIALIZATION */ acpi_bus_osc_negotiate_platform_control(); acpi_bus_osc_negotiate_usb_control(); /* * _PDC control method may load dynamic SSDT tables, * and we need to install the table handler before that. */ status = acpi_install_table_handler(acpi_bus_table_handler, NULL); acpi_sysfs_init(); acpi_early_processor_control_setup(); /* * Maybe EC region is required at bus_scan/acpi_get_devices. So it * is necessary to enable it as early as possible. */ acpi_ec_dsdt_probe(); pr_info("Interpreter enabled\n"); /* Initialize sleep structures */ acpi_sleep_init(); /* * Get the system interrupt model and evaluate \_PIC. */ result = acpi_bus_init_irq(); if (result) goto error1; /* * Register the for all standard device notifications. */ status = acpi_install_notify_handler(ACPI_ROOT_OBJECT, ACPI_SYSTEM_NOTIFY, &acpi_bus_notify, NULL); if (ACPI_FAILURE(status)) { pr_err("Unable to register for system notifications\n"); goto error1; } /* * Create the top ACPI proc directory */ acpi_root_dir = proc_mkdir(ACPI_BUS_FILE_ROOT, NULL); result = bus_register(&acpi_bus_type); if (!result) return 0; /* Mimic structured exception handling */ error1: acpi_terminate(); return -ENODEV; } struct kobject *acpi_kobj; EXPORT_SYMBOL_GPL(acpi_kobj); static int __init acpi_init(void) { int result; if (acpi_disabled) { pr_info("Interpreter disabled.\n"); return -ENODEV; } acpi_kobj = kobject_create_and_add("acpi", firmware_kobj); if (!acpi_kobj) pr_debug("%s: kset create error\n", __func__); init_prmt(); acpi_init_pcc(); result = acpi_bus_init(); if (result) { kobject_put(acpi_kobj); disable_acpi(); return result; } acpi_init_ffh(); pci_mmcfg_late_init(); acpi_viot_early_init(); acpi_hest_init(); acpi_ghes_init(); acpi_arm_init(); acpi_riscv_init(); acpi_scan_init(); acpi_ec_init(); acpi_debugfs_init(); acpi_sleep_proc_init(); acpi_wakeup_device_init(); acpi_debugger_init(); acpi_setup_sb_notify_handler(); acpi_viot_init(); return 0; } subsys_initcall(acpi_init);
23 36 18 7 17 2 11 15 26 23 37 36 5 10 10 7 10 10 3 7 2 3 2 2 96 96 103 96 1 23 2 14 5 12 12 2 3 5 7 7 7 100 100 100 10 4 1 6 1 6 1 6 4 19 4 17 14 7 23 17 20 21 3 20 3 4 15 3 7 21 3 8 1 9 10 10 22 13 13 3 3 13 5 2 5 5 4 4 17 17 17 5 15 17 17 13 6 39 39 1 8 32 39 15 15 1 11 31 11 27 21 29 12 29 13 35 5 39 7 6 2 8 5 16 16 7 16 23 14 21 89 4 58 26 24 3 55 4 4 14 38 36 16 65 10 34 40 55 21 26 15 17 17 19 40 34 6 1 2 2 22 8 5 3 2 26 5 7 3 3 3 6 4 3 1 2 1 3 3 16 12 12 12 25 22 3 16 3 3 4 3 1 4 4 4 2 4 5 7 7 3 3 5 3 8 7 7 9 8 8 9 9 9 9 9 6 3 6 6 11 1 1 1 3 3 4 3 7 7 5 1 1 5 6 1 5 2 5 6 17 14 3 2 9 3 3 8 6 19 5 16 6 6 1 6 3 3 3 3 3 2 2 2 43 43 1 4 80 80 3 3 11 3 14 72 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 // SPDX-License-Identifier: GPL-2.0 /* Multipath TCP * * Copyright (c) 2020, Red Hat, Inc. */ #define pr_fmt(fmt) "MPTCP: " fmt #include <linux/inet.h> #include <linux/kernel.h> #include <net/inet_common.h> #include <net/netns/generic.h> #include <net/mptcp.h> #include "protocol.h" #include "mib.h" #include "mptcp_pm_gen.h" static int pm_nl_pernet_id; struct mptcp_pm_add_entry { struct list_head list; struct mptcp_addr_info addr; u8 retrans_times; struct timer_list add_timer; struct mptcp_sock *sock; }; struct pm_nl_pernet { /* protects pernet updates */ spinlock_t lock; struct list_head local_addr_list; unsigned int addrs; unsigned int stale_loss_cnt; unsigned int add_addr_signal_max; unsigned int add_addr_accept_max; unsigned int local_addr_max; unsigned int subflows_max; unsigned int next_id; DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); }; #define MPTCP_PM_ADDR_MAX 8 #define ADD_ADDR_RETRANS_MAX 3 static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) { return net_generic(net, pm_nl_pernet_id); } static struct pm_nl_pernet * pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) { return pm_nl_get_pernet(sock_net((struct sock *)msk)); } bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port) { bool addr_equals = false; if (a->family == b->family) { if (a->family == AF_INET) addr_equals = a->addr.s_addr == b->addr.s_addr; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6); } else if (a->family == AF_INET) { if (ipv6_addr_v4mapped(&b->addr6)) addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3]; } else if (b->family == AF_INET) { if (ipv6_addr_v4mapped(&a->addr6)) addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr; #endif } if (!addr_equals) return false; if (!use_port) return true; return a->port == b->port; } void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr) { addr->family = skc->skc_family; addr->port = htons(skc->skc_num); if (addr->family == AF_INET) addr->addr.s_addr = skc->skc_rcv_saddr; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (addr->family == AF_INET6) addr->addr6 = skc->skc_v6_rcv_saddr; #endif } static void remote_address(const struct sock_common *skc, struct mptcp_addr_info *addr) { addr->family = skc->skc_family; addr->port = skc->skc_dport; if (addr->family == AF_INET) addr->addr.s_addr = skc->skc_daddr; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (addr->family == AF_INET6) addr->addr6 = skc->skc_v6_daddr; #endif } static bool lookup_subflow_by_saddr(const struct list_head *list, const struct mptcp_addr_info *saddr) { struct mptcp_subflow_context *subflow; struct mptcp_addr_info cur; struct sock_common *skc; list_for_each_entry(subflow, list, node) { skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); mptcp_local_address(skc, &cur); if (mptcp_addresses_equal(&cur, saddr, saddr->port)) return true; } return false; } static bool lookup_subflow_by_daddr(const struct list_head *list, const struct mptcp_addr_info *daddr) { struct mptcp_subflow_context *subflow; struct mptcp_addr_info cur; list_for_each_entry(subflow, list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); if (!((1 << inet_sk_state_load(ssk)) & (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV))) continue; remote_address((struct sock_common *)ssk, &cur); if (mptcp_addresses_equal(&cur, daddr, daddr->port)) return true; } return false; } static bool select_local_address(const struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, struct mptcp_pm_local *new_local) { struct mptcp_pm_addr_entry *entry; bool found = false; msk_owned_by_me(msk); rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) continue; if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) continue; new_local->addr = entry->addr; new_local->flags = entry->flags; new_local->ifindex = entry->ifindex; found = true; break; } rcu_read_unlock(); return found; } static bool select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, struct mptcp_pm_local *new_local) { struct mptcp_pm_addr_entry *entry; bool found = false; rcu_read_lock(); /* do not keep any additional per socket state, just signal * the address list in order. * Note: removal from the local address list during the msk life-cycle * can lead to additional addresses not being announced. */ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) continue; if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) continue; new_local->addr = entry->addr; new_local->flags = entry->flags; new_local->ifindex = entry->ifindex; found = true; break; } rcu_read_unlock(); return found; } unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) { const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); return READ_ONCE(pernet->add_addr_signal_max); } EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk) { struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); return READ_ONCE(pernet->add_addr_accept_max); } EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk) { struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); return READ_ONCE(pernet->subflows_max); } EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max); unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk) { struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); return READ_ONCE(pernet->local_addr_max); } EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max); bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) { struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) || (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) { WRITE_ONCE(msk->pm.work_pending, false); return false; } return true; } struct mptcp_pm_add_entry * mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, const struct mptcp_addr_info *addr) { struct mptcp_pm_add_entry *entry; lockdep_assert_held(&msk->pm.lock); list_for_each_entry(entry, &msk->pm.anno_list, list) { if (mptcp_addresses_equal(&entry->addr, addr, true)) return entry; } return NULL; } bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) { struct mptcp_pm_add_entry *entry; struct mptcp_addr_info saddr; bool ret = false; mptcp_local_address((struct sock_common *)sk, &saddr); spin_lock_bh(&msk->pm.lock); list_for_each_entry(entry, &msk->pm.anno_list, list) { if (mptcp_addresses_equal(&entry->addr, &saddr, true)) { ret = true; goto out; } } out: spin_unlock_bh(&msk->pm.lock); return ret; } static void mptcp_pm_add_timer(struct timer_list *timer) { struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); struct mptcp_sock *msk = entry->sock; struct sock *sk = (struct sock *)msk; pr_debug("msk=%p\n", msk); if (!msk) return; if (inet_sk_state_load(sk) == TCP_CLOSE) return; if (!entry->addr.id) return; if (mptcp_pm_should_add_signal_addr(msk)) { sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); goto out; } spin_lock_bh(&msk->pm.lock); if (!mptcp_pm_should_add_signal_addr(msk)) { pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id); mptcp_pm_announce_addr(msk, &entry->addr, false); mptcp_pm_add_addr_send_ack(msk); entry->retrans_times++; } if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) sk_reset_timer(sk, timer, jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); spin_unlock_bh(&msk->pm.lock); if (entry->retrans_times == ADD_ADDR_RETRANS_MAX) mptcp_pm_subflow_established(msk); out: __sock_put(sk); } struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool check_id) { struct mptcp_pm_add_entry *entry; struct sock *sk = (struct sock *)msk; struct timer_list *add_timer = NULL; spin_lock_bh(&msk->pm.lock); entry = mptcp_lookup_anno_list_by_saddr(msk, addr); if (entry && (!check_id || entry->addr.id == addr->id)) { entry->retrans_times = ADD_ADDR_RETRANS_MAX; add_timer = &entry->add_timer; } if (!check_id && entry) list_del(&entry->list); spin_unlock_bh(&msk->pm.lock); /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */ if (add_timer) sk_stop_timer_sync(sk, add_timer); return entry; } bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, const struct mptcp_addr_info *addr) { struct mptcp_pm_add_entry *add_entry = NULL; struct sock *sk = (struct sock *)msk; struct net *net = sock_net(sk); lockdep_assert_held(&msk->pm.lock); add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); if (add_entry) { if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) return false; sk_reset_timer(sk, &add_entry->add_timer, jiffies + mptcp_get_add_addr_timeout(net)); return true; } add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); if (!add_entry) return false; list_add(&add_entry->list, &msk->pm.anno_list); add_entry->addr = *addr; add_entry->sock = msk; add_entry->retrans_times = 0; timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); sk_reset_timer(sk, &add_entry->add_timer, jiffies + mptcp_get_add_addr_timeout(net)); return true; } void mptcp_pm_free_anno_list(struct mptcp_sock *msk) { struct mptcp_pm_add_entry *entry, *tmp; struct sock *sk = (struct sock *)msk; LIST_HEAD(free_list); pr_debug("msk=%p\n", msk); spin_lock_bh(&msk->pm.lock); list_splice_init(&msk->pm.anno_list, &free_list); spin_unlock_bh(&msk->pm.lock); list_for_each_entry_safe(entry, tmp, &free_list, list) { sk_stop_timer_sync(sk, &entry->add_timer); kfree(entry); } } /* Fill all the remote addresses into the array addrs[], * and return the array size. */ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *local, bool fullmesh, struct mptcp_addr_info *addrs) { bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); struct sock *sk = (struct sock *)msk, *ssk; struct mptcp_subflow_context *subflow; struct mptcp_addr_info remote = { 0 }; unsigned int subflows_max; int i = 0; subflows_max = mptcp_pm_get_subflows_max(msk); remote_address((struct sock_common *)sk, &remote); /* Non-fullmesh endpoint, fill in the single entry * corresponding to the primary MPC subflow remote address */ if (!fullmesh) { if (deny_id0) return 0; if (!mptcp_pm_addr_families_match(sk, local, &remote)) return 0; msk->pm.subflows++; addrs[i++] = remote; } else { DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); /* Forbid creation of new subflows matching existing * ones, possibly already created by incoming ADD_ADDR */ bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); mptcp_for_each_subflow(msk, subflow) if (READ_ONCE(subflow->local_id) == local->id) __set_bit(subflow->remote_id, unavail_id); mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); addrs[i].id = READ_ONCE(subflow->remote_id); if (deny_id0 && !addrs[i].id) continue; if (test_bit(addrs[i].id, unavail_id)) continue; if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) continue; if (msk->pm.subflows < subflows_max) { /* forbid creating multiple address towards * this id */ __set_bit(addrs[i].id, unavail_id); msk->pm.subflows++; i++; } } } return i; } static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, bool prio, bool backup) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow; pr_debug("send ack for %s\n", prio ? "mp_prio" : (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr")); slow = lock_sock_fast(ssk); if (prio) { subflow->send_mp_prio = 1; subflow->request_bkup = backup; } __mptcp_subflow_send_ack(ssk); unlock_sock_fast(ssk, slow); } static void mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, bool prio, bool backup) { spin_unlock_bh(&msk->pm.lock); __mptcp_pm_send_ack(msk, subflow, prio, backup); spin_lock_bh(&msk->pm.lock); } static struct mptcp_pm_addr_entry * __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) { struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, &pernet->local_addr_list, list) { if (entry->addr.id == id) return entry; } return NULL; } static struct mptcp_pm_addr_entry * __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) { struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, &pernet->local_addr_list, list) { if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) return entry; } return NULL; } static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; unsigned int add_addr_signal_max; bool signal_and_subflow = false; unsigned int local_addr_max; struct pm_nl_pernet *pernet; struct mptcp_pm_local local; unsigned int subflows_max; pernet = pm_nl_get_pernet(sock_net(sk)); add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk); local_addr_max = mptcp_pm_get_local_addr_max(msk); subflows_max = mptcp_pm_get_subflows_max(msk); /* do lazy endpoint usage accounting for the MPC subflows */ if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first); struct mptcp_pm_addr_entry *entry; struct mptcp_addr_info mpc_addr; bool backup = false; mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); rcu_read_lock(); entry = __lookup_addr(pernet, &mpc_addr); if (entry) { __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); msk->mpc_endpoint_id = entry->addr.id; backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); } rcu_read_unlock(); if (backup) mptcp_pm_send_ack(msk, subflow, true, backup); msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); } pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", msk->pm.local_addr_used, local_addr_max, msk->pm.add_addr_signaled, add_addr_signal_max, msk->pm.subflows, subflows_max); /* check first for announce */ if (msk->pm.add_addr_signaled < add_addr_signal_max) { /* due to racing events on both ends we can reach here while * previous add address is still running: if we invoke now * mptcp_pm_announce_addr(), that will fail and the * corresponding id will be marked as used. * Instead let the PM machinery reschedule us when the * current address announce will be completed. */ if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) return; if (!select_signal_address(pernet, msk, &local)) goto subflow; /* If the alloc fails, we are on memory pressure, not worth * continuing, and trying to create subflows. */ if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) return; __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; /* Special case for ID0: set the correct ID */ if (local.addr.id == msk->mpc_endpoint_id) local.addr.id = 0; mptcp_pm_announce_addr(msk, &local.addr, false); mptcp_pm_nl_addr_send_ack(msk); if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) signal_and_subflow = true; } subflow: /* check if should create a new subflow */ while (msk->pm.local_addr_used < local_addr_max && msk->pm.subflows < subflows_max) { struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; bool fullmesh; int i, nr; if (signal_and_subflow) signal_and_subflow = false; else if (!select_local_address(pernet, msk, &local)) break; fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); /* Special case for ID0: set the correct ID */ if (local.addr.id == msk->mpc_endpoint_id) local.addr.id = 0; else /* local_addr_used is not decr for ID 0 */ msk->pm.local_addr_used++; nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); if (nr == 0) continue; spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) __mptcp_subflow_connect(sk, &local, &addrs[i]); spin_lock_bh(&msk->pm.lock); } mptcp_pm_nl_check_work_pending(msk); } static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) { mptcp_pm_create_subflow_or_signal_addr(msk); } static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) { mptcp_pm_create_subflow_or_signal_addr(msk); } /* Fill all the local addresses into the array addrs[], * and return the array size. */ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *remote, struct mptcp_pm_local *locals) { struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *entry; struct mptcp_addr_info mpc_addr; struct pm_nl_pernet *pernet; unsigned int subflows_max; int i = 0; pernet = pm_nl_get_pernet_from_msk(msk); subflows_max = mptcp_pm_get_subflows_max(msk); mptcp_local_address((struct sock_common *)msk, &mpc_addr); rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) continue; if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote)) continue; if (msk->pm.subflows < subflows_max) { locals[i].addr = entry->addr; locals[i].flags = entry->flags; locals[i].ifindex = entry->ifindex; /* Special case for ID0: set the correct ID */ if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port)) locals[i].addr.id = 0; msk->pm.subflows++; i++; } } rcu_read_unlock(); /* If the array is empty, fill in the single * 'IPADDRANY' local address */ if (!i) { memset(&locals[i], 0, sizeof(locals[i])); locals[i].addr.family = #if IS_ENABLED(CONFIG_MPTCP_IPV6) remote->family == AF_INET6 && ipv6_addr_v4mapped(&remote->addr6) ? AF_INET : #endif remote->family; if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote)) return 0; msk->pm.subflows++; i++; } return i; } static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) { struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX]; struct sock *sk = (struct sock *)msk; unsigned int add_addr_accept_max; struct mptcp_addr_info remote; unsigned int subflows_max; bool sf_created = false; int i, nr; add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); subflows_max = mptcp_pm_get_subflows_max(msk); pr_debug("accepted %d:%d remote family %d\n", msk->pm.add_addr_accepted, add_addr_accept_max, msk->pm.remote.family); remote = msk->pm.remote; mptcp_pm_announce_addr(msk, &remote, true); mptcp_pm_nl_addr_send_ack(msk); if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) return; /* pick id 0 port, if none is provided the remote address */ if (!remote.port) remote.port = sk->sk_dport; /* connect to the specified remote address, using whatever * local address the routing configuration will pick. */ nr = fill_local_addresses_vec(msk, &remote, locals); if (nr == 0) return; spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0) sf_created = true; spin_lock_bh(&msk->pm.lock); if (sf_created) { /* add_addr_accepted is not decr for ID 0 */ if (remote.id) msk->pm.add_addr_accepted++; if (msk->pm.add_addr_accepted >= add_addr_accept_max || msk->pm.subflows >= subflows_max) WRITE_ONCE(msk->pm.accept_addr, false); } } bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *remote) { struct mptcp_addr_info mpc_remote; remote_address((struct sock_common *)msk, &mpc_remote); return mptcp_addresses_equal(&mpc_remote, remote, remote->port); } void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; msk_owned_by_me(msk); lockdep_assert_held(&msk->pm.lock); if (!mptcp_pm_should_add_signal(msk) && !mptcp_pm_should_rm_signal(msk)) return; mptcp_for_each_subflow(msk, subflow) { if (__mptcp_subflow_active(subflow)) { mptcp_pm_send_ack(msk, subflow, false, false); break; } } } int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct mptcp_addr_info *addr, struct mptcp_addr_info *rem, u8 bkup) { struct mptcp_subflow_context *subflow; pr_debug("bkup=%d\n", bkup); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct mptcp_addr_info local, remote; mptcp_local_address((struct sock_common *)ssk, &local); if (!mptcp_addresses_equal(&local, addr, addr->port)) continue; if (rem && rem->family != AF_UNSPEC) { remote_address((struct sock_common *)ssk, &remote); if (!mptcp_addresses_equal(&remote, rem, rem->port)) continue; } __mptcp_pm_send_ack(msk, subflow, true, bkup); return 0; } return -EINVAL; } static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list, enum linux_mptcp_mib_field rm_type) { struct mptcp_subflow_context *subflow, *tmp; struct sock *sk = (struct sock *)msk; u8 i; pr_debug("%s rm_list_nr %d\n", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); msk_owned_by_me(msk); if (sk->sk_state == TCP_LISTEN) return; if (!rm_list->nr) return; if (list_empty(&msk->conn_list)) return; for (i = 0; i < rm_list->nr; i++) { u8 rm_id = rm_list->ids[i]; bool removed = false; mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); u8 remote_id = READ_ONCE(subflow->remote_id); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; u8 id = subflow_get_local_id(subflow); if (inet_sk_state_load(ssk) == TCP_CLOSE) continue; if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) continue; if (rm_type == MPTCP_MIB_RMSUBFLOW && id != rm_id) continue; pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u\n", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", i, rm_id, id, remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); /* the following takes care of updating the subflows counter */ mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); removed |= subflow->request_join; if (rm_type == MPTCP_MIB_RMSUBFLOW) __MPTCP_INC_STATS(sock_net(sk), rm_type); } if (rm_type == MPTCP_MIB_RMADDR) __MPTCP_INC_STATS(sock_net(sk), rm_type); if (!removed) continue; if (!mptcp_pm_is_kernel(msk)) continue; if (rm_type == MPTCP_MIB_RMADDR && rm_id && !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { /* Note: if the subflow has been closed before, this * add_addr_accepted counter will not be decremented. */ if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) WRITE_ONCE(msk->pm.accept_addr, true); } } } static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) { mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); } static void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) { mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); } void mptcp_pm_nl_work(struct mptcp_sock *msk) { struct mptcp_pm_data *pm = &msk->pm; msk_owned_by_me(msk); if (!(pm->status & MPTCP_PM_WORK_MASK)) return; spin_lock_bh(&msk->pm.lock); pr_debug("msk=%p status=%x\n", msk, pm->status); if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); mptcp_pm_nl_add_addr_received(msk); } if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); mptcp_pm_nl_addr_send_ack(msk); } if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); mptcp_pm_nl_rm_addr_received(msk); } if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); mptcp_pm_nl_fully_established(msk); } if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); mptcp_pm_nl_subflow_established(msk); } spin_unlock_bh(&msk->pm.lock); } static bool address_use_port(struct mptcp_pm_addr_entry *entry) { return (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) == MPTCP_PM_ADDR_FLAG_SIGNAL; } /* caller must ensure the RCU grace period is already elapsed */ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) { if (entry->lsk) sock_release(entry->lsk); kfree(entry); } static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, struct mptcp_pm_addr_entry *entry, bool needs_id) { struct mptcp_pm_addr_entry *cur, *del_entry = NULL; unsigned int addr_max; int ret = -EINVAL; spin_lock_bh(&pernet->lock); /* to keep the code simple, don't do IDR-like allocation for address ID, * just bail when we exceed limits */ if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) pernet->next_id = 1; if (pernet->addrs >= MPTCP_PM_ADDR_MAX) { ret = -ERANGE; goto out; } if (test_bit(entry->addr.id, pernet->id_bitmap)) { ret = -EBUSY; goto out; } /* do not insert duplicate address, differentiate on port only * singled addresses */ if (!address_use_port(entry)) entry->addr.port = 0; list_for_each_entry(cur, &pernet->local_addr_list, list) { if (mptcp_addresses_equal(&cur->addr, &entry->addr, cur->addr.port || entry->addr.port)) { /* allow replacing the exiting endpoint only if such * endpoint is an implicit one and the user-space * did not provide an endpoint id */ if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) { ret = -EEXIST; goto out; } if (entry->addr.id) goto out; pernet->addrs--; entry->addr.id = cur->addr.id; list_del_rcu(&cur->list); del_entry = cur; break; } } if (!entry->addr.id && needs_id) { find_next: entry->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, pernet->next_id); if (!entry->addr.id && pernet->next_id != 1) { pernet->next_id = 1; goto find_next; } } if (!entry->addr.id && needs_id) goto out; __set_bit(entry->addr.id, pernet->id_bitmap); if (entry->addr.id > pernet->next_id) pernet->next_id = entry->addr.id; if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { addr_max = pernet->add_addr_signal_max; WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1); } if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { addr_max = pernet->local_addr_max; WRITE_ONCE(pernet->local_addr_max, addr_max + 1); } pernet->addrs++; if (!entry->addr.port) list_add_tail_rcu(&entry->list, &pernet->local_addr_list); else list_add_rcu(&entry->list, &pernet->local_addr_list); ret = entry->addr.id; out: spin_unlock_bh(&pernet->lock); /* just replaced an existing entry, free it */ if (del_entry) { synchronize_rcu(); __mptcp_pm_release_addr_entry(del_entry); } return ret; } static struct lock_class_key mptcp_slock_keys[2]; static struct lock_class_key mptcp_keys[2]; static int mptcp_pm_nl_create_listen_socket(struct sock *sk, struct mptcp_pm_addr_entry *entry) { bool is_ipv6 = sk->sk_family == AF_INET6; int addrlen = sizeof(struct sockaddr_in); struct sockaddr_storage addr; struct sock *newsk, *ssk; int backlog = 1024; int err; err = sock_create_kern(sock_net(sk), entry->addr.family, SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk); if (err) return err; newsk = entry->lsk->sk; if (!newsk) return -EINVAL; /* The subflow socket lock is acquired in a nested to the msk one * in several places, even by the TCP stack, and this msk is a kernel * socket: lockdep complains. Instead of propagating the _nested * modifiers in several places, re-init the lock class for the msk * socket to an mptcp specific one. */ sock_lock_init_class_and_name(newsk, is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET", &mptcp_slock_keys[is_ipv6], is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET", &mptcp_keys[is_ipv6]); lock_sock(newsk); ssk = __mptcp_nmpc_sk(mptcp_sk(newsk)); release_sock(newsk); if (IS_ERR(ssk)) return PTR_ERR(ssk); mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (entry->addr.family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif if (ssk->sk_family == AF_INET) err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (ssk->sk_family == AF_INET6) err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); #endif if (err) return err; /* We don't use mptcp_set_state() here because it needs to be called * under the msk socket lock. For the moment, that will not bring * anything more than only calling inet_sk_state_store(), because the * old status is known (TCP_CLOSE). */ inet_sk_state_store(newsk, TCP_LISTEN); lock_sock(ssk); err = __inet_listen_sk(ssk, backlog); if (!err) mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); release_sock(ssk); return err; } int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { struct mptcp_pm_addr_entry *entry; struct pm_nl_pernet *pernet; int ret = -1; pernet = pm_nl_get_pernet_from_msk(msk); rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { ret = entry->addr.id; break; } } rcu_read_unlock(); if (ret >= 0) return ret; /* address not found, add to local list */ entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return -ENOMEM; entry->addr = *skc; entry->addr.id = 0; entry->addr.port = 0; entry->ifindex = 0; entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; entry->lsk = NULL; ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); if (ret < 0) kfree(entry); return ret; } bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); struct mptcp_pm_addr_entry *entry; bool backup = false; rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); break; } } rcu_read_unlock(); return backup; } #define MPTCP_PM_CMD_GRP_OFFSET 0 #define MPTCP_PM_EV_GRP_OFFSET 1 static const struct genl_multicast_group mptcp_pm_mcgrps[] = { [MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, }, [MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME, .flags = GENL_MCAST_CAP_NET_ADMIN, }, }; void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = (struct sock *)msk; unsigned int active_max_loss_cnt; struct net *net = sock_net(sk); unsigned int stale_loss_cnt; bool slow; stale_loss_cnt = mptcp_stale_loss_cnt(net); if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt) return; /* look for another available subflow not in loss state */ active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1); mptcp_for_each_subflow(msk, iter) { if (iter != subflow && mptcp_subflow_active(iter) && iter->stale_count < active_max_loss_cnt) { /* we have some alternatives, try to mark this subflow as idle ...*/ slow = lock_sock_fast(ssk); if (!tcp_rtx_and_write_queues_empty(ssk)) { subflow->stale = 1; __mptcp_retransmit_pending_data(sk); MPTCP_INC_STATS(net, MPTCP_MIB_SUBFLOWSTALE); } unlock_sock_fast(ssk, slow); /* always try to push the pending data regardless of re-injections: * we can possibly use backup subflows now, and subflow selection * is cheap under the msk socket lock */ __mptcp_push_pending(sk, 0); return; } } } static int mptcp_pm_family_to_addr(int family) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (family == AF_INET6) return MPTCP_PM_ADDR_ATTR_ADDR6; #endif return MPTCP_PM_ADDR_ATTR_ADDR4; } static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[], const struct nlattr *attr, struct genl_info *info, struct mptcp_addr_info *addr, bool require_family) { int err, addr_addr; if (!attr) { GENL_SET_ERR_MSG(info, "missing address info"); return -EINVAL; } /* no validation needed - was already done via nested policy */ err = nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, mptcp_pm_address_nl_policy, info->extack); if (err) return err; if (tb[MPTCP_PM_ADDR_ATTR_ID]) addr->id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]); if (!tb[MPTCP_PM_ADDR_ATTR_FAMILY]) { if (!require_family) return 0; NL_SET_ERR_MSG_ATTR(info->extack, attr, "missing family"); return -EINVAL; } addr->family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]); if (addr->family != AF_INET #if IS_ENABLED(CONFIG_MPTCP_IPV6) && addr->family != AF_INET6 #endif ) { NL_SET_ERR_MSG_ATTR(info->extack, attr, "unknown address family"); return -EINVAL; } addr_addr = mptcp_pm_family_to_addr(addr->family); if (!tb[addr_addr]) { NL_SET_ERR_MSG_ATTR(info->extack, attr, "missing address data"); return -EINVAL; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (addr->family == AF_INET6) addr->addr6 = nla_get_in6_addr(tb[addr_addr]); else #endif addr->addr.s_addr = nla_get_in_addr(tb[addr_addr]); if (tb[MPTCP_PM_ADDR_ATTR_PORT]) addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); return 0; } int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, struct mptcp_addr_info *addr) { struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; memset(addr, 0, sizeof(*addr)); return mptcp_pm_parse_pm_addr_attr(tb, attr, info, addr, true); } int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, bool require_family, struct mptcp_pm_addr_entry *entry) { struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; int err; memset(entry, 0, sizeof(*entry)); err = mptcp_pm_parse_pm_addr_attr(tb, attr, info, &entry->addr, require_family); if (err) return err; if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) { u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]); entry->ifindex = val; } if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); if (tb[MPTCP_PM_ADDR_ATTR_PORT]) entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); return 0; } static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) { return pm_nl_get_pernet(genl_info_net(info)); } static int mptcp_nl_add_subflow_or_signal_addr(struct net *net, struct mptcp_addr_info *addr) { struct mptcp_sock *msk; long s_slot = 0, s_num = 0; while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; struct mptcp_addr_info mpc_addr; if (!READ_ONCE(msk->fully_established) || mptcp_pm_is_userspace(msk)) goto next; /* if the endp linked to the init sf is re-added with a != ID */ mptcp_local_address((struct sock_common *)msk, &mpc_addr); lock_sock(sk); spin_lock_bh(&msk->pm.lock); if (mptcp_addresses_equal(addr, &mpc_addr, addr->port)) msk->mpc_endpoint_id = addr->id; mptcp_pm_create_subflow_or_signal_addr(msk); spin_unlock_bh(&msk->pm.lock); release_sock(sk); next: sock_put(sk); cond_resched(); } return 0; } static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, struct genl_info *info) { struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, mptcp_pm_address_nl_policy, info->extack) && tb[MPTCP_PM_ADDR_ATTR_ID]) return true; return false; } int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; int ret; ret = mptcp_pm_parse_entry(attr, info, true, &addr); if (ret < 0) return ret; if (addr.addr.port && !address_use_port(&addr)) { GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port"); return -EINVAL; } if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL && addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { GENL_SET_ERR_MSG(info, "flags mustn't have both signal and fullmesh"); return -EINVAL; } if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { GENL_SET_ERR_MSG(info, "can't create IMPLICIT endpoint"); return -EINVAL; } entry = kzalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT); if (!entry) { GENL_SET_ERR_MSG(info, "can't allocate addr"); return -ENOMEM; } *entry = addr; if (entry->addr.port) { ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); if (ret) { GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret); goto out_free; } } ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, !mptcp_pm_has_addr_attr_id(attr, info)); if (ret < 0) { GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); goto out_free; } mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr); return 0; out_free: __mptcp_pm_release_addr_entry(entry); return ret; } static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr) { struct mptcp_pm_add_entry *entry; entry = mptcp_pm_del_add_timer(msk, addr, false); if (entry) { kfree(entry); return true; } return false; } static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk, const struct mptcp_addr_info *addr) { return msk->mpc_endpoint_id == addr->id ? 0 : addr->id; } static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool force) { struct mptcp_rm_list list = { .nr = 0 }; bool ret; list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); ret = remove_anno_list_by_saddr(msk, addr); if (ret || force) { spin_lock_bh(&msk->pm.lock); if (ret) { __set_bit(addr->id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled--; } mptcp_pm_remove_addr(msk, &list); spin_unlock_bh(&msk->pm.lock); } return ret; } static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) { /* If it was marked as used, and not ID 0, decrement local_addr_used */ if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) msk->pm.local_addr_used--; } static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, const struct mptcp_pm_addr_entry *entry) { const struct mptcp_addr_info *addr = &entry->addr; struct mptcp_rm_list list = { .nr = 1 }; long s_slot = 0, s_num = 0; struct mptcp_sock *msk; pr_debug("remove_id=%d\n", addr->id); while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; bool remove_subflow; if (mptcp_pm_is_userspace(msk)) goto next; if (list_empty(&msk->conn_list)) { mptcp_pm_remove_anno_addr(msk, addr, false); goto next; } lock_sock(sk); remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr); mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); list.ids[0] = mptcp_endp_get_local_id(msk, addr); if (remove_subflow) { spin_lock_bh(&msk->pm.lock); mptcp_pm_nl_rm_subflow_received(msk, &list); spin_unlock_bh(&msk->pm.lock); } if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { spin_lock_bh(&msk->pm.lock); __mark_subflow_endp_available(msk, list.ids[0]); spin_unlock_bh(&msk->pm.lock); } if (msk->mpc_endpoint_id == entry->addr.id) msk->mpc_endpoint_id = 0; release_sock(sk); next: sock_put(sk); cond_resched(); } return 0; } static int mptcp_nl_remove_id_zero_address(struct net *net, struct mptcp_addr_info *addr) { struct mptcp_rm_list list = { .nr = 0 }; long s_slot = 0, s_num = 0; struct mptcp_sock *msk; list.ids[list.nr++] = 0; while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; struct mptcp_addr_info msk_local; if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) goto next; mptcp_local_address((struct sock_common *)msk, &msk_local); if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) goto next; lock_sock(sk); spin_lock_bh(&msk->pm.lock); mptcp_pm_remove_addr(msk, &list); mptcp_pm_nl_rm_subflow_received(msk, &list); __mark_subflow_endp_available(msk, 0); spin_unlock_bh(&msk->pm.lock); release_sock(sk); next: sock_put(sk); cond_resched(); } return 0; } int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; unsigned int addr_max; int ret; ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) return ret; /* the zero id address is special: the first address used by the msk * always gets such an id, so different subflows can have different zero * id addresses. Additionally zero id is not accounted for in id_bitmap. * Let's use an 'mptcp_rm_list' instead of the common remove code. */ if (addr.addr.id == 0) return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); spin_lock_bh(&pernet->lock); entry = __lookup_addr_by_id(pernet, addr.addr.id); if (!entry) { GENL_SET_ERR_MSG(info, "address not found"); spin_unlock_bh(&pernet->lock); return -EINVAL; } if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { addr_max = pernet->add_addr_signal_max; WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1); } if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { addr_max = pernet->local_addr_max; WRITE_ONCE(pernet->local_addr_max, addr_max - 1); } pernet->addrs--; list_del_rcu(&entry->list); __clear_bit(entry->addr.id, pernet->id_bitmap); spin_unlock_bh(&pernet->lock); mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry); synchronize_rcu(); __mptcp_pm_release_addr_entry(entry); return ret; } /* Called from the userspace PM only */ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; int anno_nr = 0; list_for_each_entry(entry, rm_list, list) { if (alist.nr >= MPTCP_RM_IDS_MAX) break; /* only delete if either announced or matching a subflow */ if (remove_anno_list_by_saddr(msk, &entry->addr)) anno_nr++; else if (!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) continue; alist.ids[alist.nr++] = entry->addr.id; } if (alist.nr) { spin_lock_bh(&msk->pm.lock); msk->pm.add_addr_signaled -= anno_nr; mptcp_pm_remove_addr(msk, &alist); spin_unlock_bh(&msk->pm.lock); } } /* Called from the in-kernel PM only */ static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, rm_list, list) { if (slist.nr < MPTCP_RM_IDS_MAX && lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); if (alist.nr < MPTCP_RM_IDS_MAX && remove_anno_list_by_saddr(msk, &entry->addr)) alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); } spin_lock_bh(&msk->pm.lock); if (alist.nr) { msk->pm.add_addr_signaled -= alist.nr; mptcp_pm_remove_addr(msk, &alist); } if (slist.nr) mptcp_pm_nl_rm_subflow_received(msk, &slist); /* Reset counters: maybe some subflows have been removed before */ bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); msk->pm.local_addr_used = 0; spin_unlock_bh(&msk->pm.lock); } static void mptcp_nl_flush_addrs_list(struct net *net, struct list_head *rm_list) { long s_slot = 0, s_num = 0; struct mptcp_sock *msk; if (list_empty(rm_list)) return; while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; if (!mptcp_pm_is_userspace(msk)) { lock_sock(sk); mptcp_pm_flush_addrs_and_subflows(msk, rm_list); release_sock(sk); } sock_put(sk); cond_resched(); } } /* caller must ensure the RCU grace period is already elapsed */ static void __flush_addrs(struct list_head *list) { while (!list_empty(list)) { struct mptcp_pm_addr_entry *cur; cur = list_entry(list->next, struct mptcp_pm_addr_entry, list); list_del_rcu(&cur->list); __mptcp_pm_release_addr_entry(cur); } } static void __reset_counters(struct pm_nl_pernet *pernet) { WRITE_ONCE(pernet->add_addr_signal_max, 0); WRITE_ONCE(pernet->add_addr_accept_max, 0); WRITE_ONCE(pernet->local_addr_max, 0); pernet->addrs = 0; } int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) { struct pm_nl_pernet *pernet = genl_info_pm_nl(info); LIST_HEAD(free_list); spin_lock_bh(&pernet->lock); list_splice_init(&pernet->local_addr_list, &free_list); __reset_counters(pernet); pernet->next_id = 1; bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); spin_unlock_bh(&pernet->lock); mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list); synchronize_rcu(); __flush_addrs(&free_list); return 0; } int mptcp_nl_fill_addr(struct sk_buff *skb, struct mptcp_pm_addr_entry *entry) { struct mptcp_addr_info *addr = &entry->addr; struct nlattr *attr; attr = nla_nest_start(skb, MPTCP_PM_ATTR_ADDR); if (!attr) return -EMSGSIZE; if (nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_FAMILY, addr->family)) goto nla_put_failure; if (nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_PORT, ntohs(addr->port))) goto nla_put_failure; if (nla_put_u8(skb, MPTCP_PM_ADDR_ATTR_ID, addr->id)) goto nla_put_failure; if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->flags)) goto nla_put_failure; if (entry->ifindex && nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex)) goto nla_put_failure; if (addr->family == AF_INET && nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4, addr->addr.s_addr)) goto nla_put_failure; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (addr->family == AF_INET6 && nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6)) goto nla_put_failure; #endif nla_nest_end(skb, attr); return 0; nla_put_failure: nla_nest_cancel(skb, attr); return -EMSGSIZE; } int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; struct sk_buff *msg; void *reply; int ret; ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) return ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, info->genlhdr->cmd); if (!reply) { GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); ret = -EMSGSIZE; goto fail; } spin_lock_bh(&pernet->lock); entry = __lookup_addr_by_id(pernet, addr.addr.id); if (!entry) { GENL_SET_ERR_MSG(info, "address not found"); ret = -EINVAL; goto unlock_fail; } ret = mptcp_nl_fill_addr(msg, entry); if (ret) goto unlock_fail; genlmsg_end(msg, reply); ret = genlmsg_reply(msg, info); spin_unlock_bh(&pernet->lock); return ret; unlock_fail: spin_unlock_bh(&pernet->lock); fail: nlmsg_free(msg); return ret; } int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) { return mptcp_pm_get_addr(skb, info); } int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) { struct net *net = sock_net(msg->sk); struct mptcp_pm_addr_entry *entry; struct pm_nl_pernet *pernet; int id = cb->args[0]; void *hdr; int i; pernet = pm_nl_get_pernet(net); spin_lock_bh(&pernet->lock); for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { if (test_bit(i, pernet->id_bitmap)) { entry = __lookup_addr_by_id(pernet, i); if (!entry) break; if (entry->addr.id <= id) continue; hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &mptcp_genl_family, NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); if (!hdr) break; if (mptcp_nl_fill_addr(msg, entry) < 0) { genlmsg_cancel(msg, hdr); break; } id = entry->addr.id; genlmsg_end(msg, hdr); } } spin_unlock_bh(&pernet->lock); cb->args[0] = id; return msg->len; } int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { return mptcp_pm_dump_addr(msg, cb); } static int parse_limit(struct genl_info *info, int id, unsigned int *limit) { struct nlattr *attr = info->attrs[id]; if (!attr) return 0; *limit = nla_get_u32(attr); if (*limit > MPTCP_PM_ADDR_MAX) { GENL_SET_ERR_MSG(info, "limit greater than maximum"); return -EINVAL; } return 0; } int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info) { struct pm_nl_pernet *pernet = genl_info_pm_nl(info); unsigned int rcv_addrs, subflows; int ret; spin_lock_bh(&pernet->lock); rcv_addrs = pernet->add_addr_accept_max; ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs); if (ret) goto unlock; subflows = pernet->subflows_max; ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows); if (ret) goto unlock; WRITE_ONCE(pernet->add_addr_accept_max, rcv_addrs); WRITE_ONCE(pernet->subflows_max, subflows); unlock: spin_unlock_bh(&pernet->lock); return ret; } int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info) { struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct sk_buff *msg; void *reply; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, MPTCP_PM_CMD_GET_LIMITS); if (!reply) goto fail; if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS, READ_ONCE(pernet->add_addr_accept_max))) goto fail; if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS, READ_ONCE(pernet->subflows_max))) goto fail; genlmsg_end(msg, reply); return genlmsg_reply(msg, info); fail: GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); nlmsg_free(msg); return -EMSGSIZE; } static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, struct mptcp_addr_info *addr) { struct mptcp_rm_list list = { .nr = 0 }; list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); spin_lock_bh(&msk->pm.lock); mptcp_pm_nl_rm_subflow_received(msk, &list); __mark_subflow_endp_available(msk, list.ids[0]); mptcp_pm_create_subflow_or_signal_addr(msk); spin_unlock_bh(&msk->pm.lock); } static int mptcp_nl_set_flags(struct net *net, struct mptcp_addr_info *addr, u8 bkup, u8 changed) { long s_slot = 0, s_num = 0; struct mptcp_sock *msk; int ret = -EINVAL; while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) goto next; lock_sock(sk); if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, NULL, bkup); if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH) mptcp_pm_nl_fullmesh(msk, addr); release_sock(sk); next: sock_put(sk); cond_resched(); } return ret; } int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) { struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | MPTCP_PM_ADDR_FLAG_FULLMESH; struct net *net = sock_net(skb->sk); struct mptcp_pm_addr_entry *entry; struct pm_nl_pernet *pernet; u8 lookup_by_id = 0; u8 bkup = 0; int ret; pernet = pm_nl_get_pernet(net); ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) return ret; if (addr.addr.family == AF_UNSPEC) { lookup_by_id = 1; if (!addr.addr.id) { GENL_SET_ERR_MSG(info, "missing required inputs"); return -EOPNOTSUPP; } } if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; spin_lock_bh(&pernet->lock); entry = lookup_by_id ? __lookup_addr_by_id(pernet, addr.addr.id) : __lookup_addr(pernet, &addr.addr); if (!entry) { spin_unlock_bh(&pernet->lock); GENL_SET_ERR_MSG(info, "address not found"); return -EINVAL; } if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { spin_unlock_bh(&pernet->lock); GENL_SET_ERR_MSG(info, "invalid addr flags"); return -EINVAL; } changed = (addr.flags ^ entry->flags) & mask; entry->flags = (entry->flags & ~mask) | (addr.flags & mask); addr = *entry; spin_unlock_bh(&pernet->lock); mptcp_nl_set_flags(net, &addr.addr, bkup, changed); return 0; } int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) { return mptcp_pm_set_flags(skb, info); } static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) { genlmsg_multicast_netns(&mptcp_genl_family, net, nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp); } bool mptcp_userspace_pm_active(const struct mptcp_sock *msk) { return genl_has_listeners(&mptcp_genl_family, sock_net((const struct sock *)msk), MPTCP_PM_EV_GRP_OFFSET); } static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) { const struct inet_sock *issk = inet_sk(ssk); const struct mptcp_subflow_context *sf; if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family)) return -EMSGSIZE; switch (ssk->sk_family) { case AF_INET: if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr)) return -EMSGSIZE; if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, issk->inet_daddr)) return -EMSGSIZE; break; #if IS_ENABLED(CONFIG_MPTCP_IPV6) case AF_INET6: { const struct ipv6_pinfo *np = inet6_sk(ssk); if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr)) return -EMSGSIZE; if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &ssk->sk_v6_daddr)) return -EMSGSIZE; break; } #endif default: WARN_ON_ONCE(1); return -EMSGSIZE; } if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport)) return -EMSGSIZE; if (nla_put_be16(skb, MPTCP_ATTR_DPORT, issk->inet_dport)) return -EMSGSIZE; sf = mptcp_subflow_ctx(ssk); if (WARN_ON_ONCE(!sf)) return -EINVAL; if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf))) return -EMSGSIZE; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) return -EMSGSIZE; return 0; } static int mptcp_event_put_token_and_ssk(struct sk_buff *skb, const struct mptcp_sock *msk, const struct sock *ssk) { const struct sock *sk = (const struct sock *)msk; const struct mptcp_subflow_context *sf; u8 sk_err; if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token))) return -EMSGSIZE; if (mptcp_event_add_subflow(skb, ssk)) return -EMSGSIZE; sf = mptcp_subflow_ctx(ssk); if (WARN_ON_ONCE(!sf)) return -EINVAL; if (nla_put_u8(skb, MPTCP_ATTR_BACKUP, sf->backup)) return -EMSGSIZE; if (ssk->sk_bound_dev_if && nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if)) return -EMSGSIZE; sk_err = READ_ONCE(ssk->sk_err); if (sk_err && sk->sk_state == TCP_ESTABLISHED && nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err)) return -EMSGSIZE; return 0; } static int mptcp_event_sub_established(struct sk_buff *skb, const struct mptcp_sock *msk, const struct sock *ssk) { return mptcp_event_put_token_and_ssk(skb, msk, ssk); } static int mptcp_event_sub_closed(struct sk_buff *skb, const struct mptcp_sock *msk, const struct sock *ssk) { const struct mptcp_subflow_context *sf; if (mptcp_event_put_token_and_ssk(skb, msk, ssk)) return -EMSGSIZE; sf = mptcp_subflow_ctx(ssk); if (!sf->reset_seen) return 0; if (nla_put_u32(skb, MPTCP_ATTR_RESET_REASON, sf->reset_reason)) return -EMSGSIZE; if (nla_put_u32(skb, MPTCP_ATTR_RESET_FLAGS, sf->reset_transient)) return -EMSGSIZE; return 0; } static int mptcp_event_created(struct sk_buff *skb, const struct mptcp_sock *msk, const struct sock *ssk) { int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)); if (err) return err; if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side))) return -EMSGSIZE; return mptcp_event_add_subflow(skb, ssk); } void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id) { struct net *net = sock_net((const struct sock *)msk); struct nlmsghdr *nlh; struct sk_buff *skb; if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) return; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return; nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, MPTCP_EVENT_REMOVED); if (!nlh) goto nla_put_failure; if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token))) goto nla_put_failure; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id)) goto nla_put_failure; genlmsg_end(skb, nlh); mptcp_nl_mcast_send(net, skb, GFP_ATOMIC); return; nla_put_failure: nlmsg_free(skb); } void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct net *net = sock_net(ssk); struct nlmsghdr *nlh; struct sk_buff *skb; if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) return; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return; nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, MPTCP_EVENT_ANNOUNCED); if (!nlh) goto nla_put_failure; if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token))) goto nla_put_failure; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id)) goto nla_put_failure; if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port == 0 ? inet_sk(ssk)->inet_dport : info->port)) goto nla_put_failure; switch (info->family) { case AF_INET: if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, info->addr.s_addr)) goto nla_put_failure; break; #if IS_ENABLED(CONFIG_MPTCP_IPV6) case AF_INET6: if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &info->addr6)) goto nla_put_failure; break; #endif default: WARN_ON_ONCE(1); goto nla_put_failure; } genlmsg_end(skb, nlh); mptcp_nl_mcast_send(net, skb, GFP_ATOMIC); return; nla_put_failure: nlmsg_free(skb); } void mptcp_event_pm_listener(const struct sock *ssk, enum mptcp_event_type event) { const struct inet_sock *issk = inet_sk(ssk); struct net *net = sock_net(ssk); struct nlmsghdr *nlh; struct sk_buff *skb; if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) return; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return; nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, event); if (!nlh) goto nla_put_failure; if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family)) goto nla_put_failure; if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport)) goto nla_put_failure; switch (ssk->sk_family) { case AF_INET: if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr)) goto nla_put_failure; break; #if IS_ENABLED(CONFIG_MPTCP_IPV6) case AF_INET6: { const struct ipv6_pinfo *np = inet6_sk(ssk); if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr)) goto nla_put_failure; break; } #endif default: WARN_ON_ONCE(1); goto nla_put_failure; } genlmsg_end(skb, nlh); mptcp_nl_mcast_send(net, skb, GFP_KERNEL); return; nla_put_failure: nlmsg_free(skb); } void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) { struct net *net = sock_net((const struct sock *)msk); struct nlmsghdr *nlh; struct sk_buff *skb; if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) return; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!skb) return; nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, type); if (!nlh) goto nla_put_failure; switch (type) { case MPTCP_EVENT_UNSPEC: WARN_ON_ONCE(1); break; case MPTCP_EVENT_CREATED: case MPTCP_EVENT_ESTABLISHED: if (mptcp_event_created(skb, msk, ssk) < 0) goto nla_put_failure; break; case MPTCP_EVENT_CLOSED: if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)) < 0) goto nla_put_failure; break; case MPTCP_EVENT_ANNOUNCED: case MPTCP_EVENT_REMOVED: /* call mptcp_event_addr_announced()/removed instead */ WARN_ON_ONCE(1); break; case MPTCP_EVENT_SUB_ESTABLISHED: case MPTCP_EVENT_SUB_PRIORITY: if (mptcp_event_sub_established(skb, msk, ssk) < 0) goto nla_put_failure; break; case MPTCP_EVENT_SUB_CLOSED: if (mptcp_event_sub_closed(skb, msk, ssk) < 0) goto nla_put_failure; break; case MPTCP_EVENT_LISTENER_CREATED: case MPTCP_EVENT_LISTENER_CLOSED: break; } genlmsg_end(skb, nlh); mptcp_nl_mcast_send(net, skb, gfp); return; nla_put_failure: nlmsg_free(skb); } struct genl_family mptcp_genl_family __ro_after_init = { .name = MPTCP_PM_NAME, .version = MPTCP_PM_VER, .netnsok = true, .module = THIS_MODULE, .ops = mptcp_pm_nl_ops, .n_ops = ARRAY_SIZE(mptcp_pm_nl_ops), .resv_start_op = MPTCP_PM_CMD_SUBFLOW_DESTROY + 1, .mcgrps = mptcp_pm_mcgrps, .n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps), }; static int __net_init pm_nl_init_net(struct net *net) { struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); INIT_LIST_HEAD_RCU(&pernet->local_addr_list); /* Cit. 2 subflows ought to be enough for anybody. */ pernet->subflows_max = 2; pernet->next_id = 1; pernet->stale_loss_cnt = 4; spin_lock_init(&pernet->lock); /* No need to initialize other pernet fields, the struct is zeroed at * allocation time. */ return 0; } static void __net_exit pm_nl_exit_net(struct list_head *net_list) { struct net *net; list_for_each_entry(net, net_list, exit_list) { struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); /* net is removed from namespace list, can't race with * other modifiers, also netns core already waited for a * RCU grace period. */ __flush_addrs(&pernet->local_addr_list); } } static struct pernet_operations mptcp_pm_pernet_ops = { .init = pm_nl_init_net, .exit_batch = pm_nl_exit_net, .id = &pm_nl_pernet_id, .size = sizeof(struct pm_nl_pernet), }; void __init mptcp_pm_nl_init(void) { if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) panic("Failed to register MPTCP PM pernet subsystem.\n"); if (genl_register_family(&mptcp_genl_family)) panic("Failed to register MPTCP PM netlink family\n"); }
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _SCSI_SCSI_HOST_H #define _SCSI_SCSI_HOST_H #include <linux/device.h> #include <linux/list.h> #include <linux/types.h> #include <linux/workqueue.h> #include <linux/mutex.h> #include <linux/seq_file.h> #include <linux/blk-mq.h> #include <scsi/scsi.h> struct block_device; struct completion; struct module; struct scsi_cmnd; struct scsi_device; struct scsi_target; struct Scsi_Host; struct scsi_transport_template; #define SG_ALL SG_CHUNK_SIZE #define MODE_UNKNOWN 0x00 #define MODE_INITIATOR 0x01 #define MODE_TARGET 0x02 /** * enum scsi_timeout_action - How to handle a command that timed out. * @SCSI_EH_DONE: The command has already been completed. * @SCSI_EH_RESET_TIMER: Reset the timer and continue waiting for completion. * @SCSI_EH_NOT_HANDLED: The command has not yet finished. Abort the command. */ enum scsi_timeout_action { SCSI_EH_DONE, SCSI_EH_RESET_TIMER, SCSI_EH_NOT_HANDLED, }; struct scsi_host_template { /* * Put fields referenced in IO submission path together in * same cacheline */ /* * Additional per-command data allocated for the driver. */ unsigned int cmd_size; /* * The queuecommand function is used to queue up a scsi * command block to the LLDD. When the driver finished * processing the command the done callback is invoked. * * If queuecommand returns 0, then the driver has accepted the * command. It must also push it to the HBA if the scsi_cmnd * flag SCMD_LAST is set, or if the driver does not implement * commit_rqs. The done() function must be called on the command * when the driver has finished with it. (you may call done on the * command before queuecommand returns, but in this case you * *must* return 0 from queuecommand). * * Queuecommand may also reject the command, in which case it may * not touch the command and must not call done() for it. * * There are two possible rejection returns: * * SCSI_MLQUEUE_DEVICE_BUSY: Block this device temporarily, but * allow commands to other devices serviced by this host. * * SCSI_MLQUEUE_HOST_BUSY: Block all devices served by this * host temporarily. * * For compatibility, any other non-zero return is treated the * same as SCSI_MLQUEUE_HOST_BUSY. * * NOTE: "temporarily" means either until the next command for# * this device/host completes, or a period of time determined by * I/O pressure in the system if there are no other outstanding * commands. * * STATUS: REQUIRED */ int (* queuecommand)(struct Scsi_Host *, struct scsi_cmnd *); /* * The commit_rqs function is used to trigger a hardware * doorbell after some requests have been queued with * queuecommand, when an error is encountered before sending * the request with SCMD_LAST set. * * STATUS: OPTIONAL */ void (*commit_rqs)(struct Scsi_Host *, u16); struct module *module; const char *name; /* * The info function will return whatever useful information the * developer sees fit. If not provided, then the name field will * be used instead. * * Status: OPTIONAL */ const char *(*info)(struct Scsi_Host *); /* * Ioctl interface * * Status: OPTIONAL */ int (*ioctl)(struct scsi_device *dev, unsigned int cmd, void __user *arg); #ifdef CONFIG_COMPAT /* * Compat handler. Handle 32bit ABI. * When unknown ioctl is passed return -ENOIOCTLCMD. * * Status: OPTIONAL */ int (*compat_ioctl)(struct scsi_device *dev, unsigned int cmd, void __user *arg); #endif int (*init_cmd_priv)(struct Scsi_Host *shost, struct scsi_cmnd *cmd); int (*exit_cmd_priv)(struct Scsi_Host *shost, struct scsi_cmnd *cmd); /* * This is an error handling strategy routine. You don't need to * define one of these if you don't want to - there is a default * routine that is present that should work in most cases. For those * driver authors that have the inclination and ability to write their * own strategy routine, this is where it is specified. Note - the * strategy routine is *ALWAYS* run in the context of the kernel eh * thread. Thus you are guaranteed to *NOT* be in an interrupt * handler when you execute this, and you are also guaranteed to * *NOT* have any other commands being queued while you are in the * strategy routine. When you return from this function, operations * return to normal. * * See scsi_error.c scsi_unjam_host for additional comments about * what this function should and should not be attempting to do. * * Status: REQUIRED (at least one of them) */ int (* eh_abort_handler)(struct scsi_cmnd *); int (* eh_device_reset_handler)(struct scsi_cmnd *); int (* eh_target_reset_handler)(struct scsi_cmnd *); int (* eh_bus_reset_handler)(struct scsi_cmnd *); int (* eh_host_reset_handler)(struct scsi_cmnd *); /* * Before the mid layer attempts to scan for a new device where none * currently exists, it will call this entry in your driver. Should * your driver need to allocate any structs or perform any other init * items in order to send commands to a currently unused target/lun * combo, then this is where you can perform those allocations. This * is specifically so that drivers won't have to perform any kind of * "is this a new device" checks in their queuecommand routine, * thereby making the hot path a bit quicker. * * Return values: 0 on success, non-0 on failure * * Deallocation: If we didn't find any devices at this ID, you will * get an immediate call to slave_destroy(). If we find something * here then you will get a call to slave_configure(), then the * device will be used for however long it is kept around, then when * the device is removed from the system (or * possibly at reboot * time), you will then get a call to slave_destroy(). This is * assuming you implement slave_configure and slave_destroy. * However, if you allocate memory and hang it off the device struct, * then you must implement the slave_destroy() routine at a minimum * in order to avoid leaking memory * each time a device is tore down. * * Status: OPTIONAL */ int (* slave_alloc)(struct scsi_device *); /* * Once the device has responded to an INQUIRY and we know the * device is online, we call into the low level driver with the * struct scsi_device *. If the low level device driver implements * this function, it *must* perform the task of setting the queue * depth on the device. All other tasks are optional and depend * on what the driver supports and various implementation details. * * Things currently recommended to be handled at this time include: * * 1. Setting the device queue depth. Proper setting of this is * described in the comments for scsi_change_queue_depth. * 2. Determining if the device supports the various synchronous * negotiation protocols. The device struct will already have * responded to INQUIRY and the results of the standard items * will have been shoved into the various device flag bits, eg. * device->sdtr will be true if the device supports SDTR messages. * 3. Allocating command structs that the device will need. * 4. Setting the default timeout on this device (if needed). * 5. Anything else the low level driver might want to do on a device * specific setup basis... * 6. Return 0 on success, non-0 on error. The device will be marked * as offline on error so that no access will occur. If you return * non-0, your slave_destroy routine will never get called for this * device, so don't leave any loose memory hanging around, clean * up after yourself before returning non-0 * * Status: OPTIONAL * * Note: slave_configure is the legacy version, use device_configure for * all new code. A driver must never define both. */ int (* device_configure)(struct scsi_device *, struct queue_limits *lim); int (* slave_configure)(struct scsi_device *); /* * Immediately prior to deallocating the device and after all activity * has ceased the mid layer calls this point so that the low level * driver may completely detach itself from the scsi device and vice * versa. The low level driver is responsible for freeing any memory * it allocated in the slave_alloc or slave_configure calls. * * Status: OPTIONAL */ void (* slave_destroy)(struct scsi_device *); /* * Before the mid layer attempts to scan for a new device attached * to a target where no target currently exists, it will call this * entry in your driver. Should your driver need to allocate any * structs or perform any other init items in order to send commands * to a currently unused target, then this is where you can perform * those allocations. * * Return values: 0 on success, non-0 on failure * * Status: OPTIONAL */ int (* target_alloc)(struct scsi_target *); /* * Immediately prior to deallocating the target structure, and * after all activity to attached scsi devices has ceased, the * midlayer calls this point so that the driver may deallocate * and terminate any references to the target. * * Note: This callback is called with the host lock held and hence * must not sleep. * * Status: OPTIONAL */ void (* target_destroy)(struct scsi_target *); /* * If a host has the ability to discover targets on its own instead * of scanning the entire bus, it can fill in this function and * call scsi_scan_host(). This function will be called periodically * until it returns 1 with the scsi_host and the elapsed time of * the scan in jiffies. * * Status: OPTIONAL */ int (* scan_finished)(struct Scsi_Host *, unsigned long); /* * If the host wants to be called before the scan starts, but * after the midlayer has set up ready for the scan, it can fill * in this function. * * Status: OPTIONAL */ void (* scan_start)(struct Scsi_Host *); /* * Fill in this function to allow the queue depth of this host * to be changeable (on a per device basis). Returns either * the current queue depth setting (may be different from what * was passed in) or an error. An error should only be * returned if the requested depth is legal but the driver was * unable to set it. If the requested depth is illegal, the * driver should set and return the closest legal queue depth. * * Status: OPTIONAL */ int (* change_queue_depth)(struct scsi_device *, int); /* * This functions lets the driver expose the queue mapping * to the block layer. * * Status: OPTIONAL */ void (* map_queues)(struct Scsi_Host *shost); /* * SCSI interface of blk_poll - poll for IO completions. * Only applicable if SCSI LLD exposes multiple h/w queues. * * Return value: Number of completed entries found. * * Status: OPTIONAL */ int (* mq_poll)(struct Scsi_Host *shost, unsigned int queue_num); /* * Check if scatterlists need to be padded for DMA draining. * * Status: OPTIONAL */ bool (* dma_need_drain)(struct request *rq); /* * This function determines the BIOS parameters for a given * harddisk. These tend to be numbers that are made up by * the host adapter. Parameters: * size, device, list (heads, sectors, cylinders) * * Status: OPTIONAL */ int (* bios_param)(struct scsi_device *, struct block_device *, sector_t, int []); /* * This function is called when one or more partitions on the * device reach beyond the end of the device. * * Status: OPTIONAL */ void (*unlock_native_capacity)(struct scsi_device *); /* * Can be used to export driver statistics and other infos to the * world outside the kernel ie. userspace and it also provides an * interface to feed the driver with information. * * Status: OBSOLETE */ int (*show_info)(struct seq_file *, struct Scsi_Host *); int (*write_info)(struct Scsi_Host *, char *, int); /* * This is an optional routine that allows the transport to become * involved when a scsi io timer fires. The return value tells the * timer routine how to finish the io timeout handling. * * Status: OPTIONAL */ enum scsi_timeout_action (*eh_timed_out)(struct scsi_cmnd *); /* * Optional routine that allows the transport to decide if a cmd * is retryable. Return true if the transport is in a state the * cmd should be retried on. */ bool (*eh_should_retry_cmd)(struct scsi_cmnd *scmd); /* This is an optional routine that allows transport to initiate * LLD adapter or firmware reset using sysfs attribute. * * Return values: 0 on success, -ve value on failure. * * Status: OPTIONAL */ int (*host_reset)(struct Scsi_Host *shost, int reset_type); #define SCSI_ADAPTER_RESET 1 #define SCSI_FIRMWARE_RESET 2 /* * Name of proc directory */ const char *proc_name; /* * This determines if we will use a non-interrupt driven * or an interrupt driven scheme. It is set to the maximum number * of simultaneous commands a single hw queue in HBA will accept. */ int can_queue; /* * In many instances, especially where disconnect / reconnect are * supported, our host also has an ID on the SCSI bus. If this is * the case, then it must be reserved. Please set this_id to -1 if * your setup is in single initiator mode, and the host lacks an * ID. */ int this_id; /* * This determines the degree to which the host adapter is capable * of scatter-gather. */ unsigned short sg_tablesize; unsigned short sg_prot_tablesize; /* * Set this if the host adapter has limitations beside segment count. */ unsigned int max_sectors; /* * Maximum size in bytes of a single segment. */ unsigned int max_segment_size; unsigned int dma_alignment; /* * DMA scatter gather segment boundary limit. A segment crossing this * boundary will be split in two. */ unsigned long dma_boundary; unsigned long virt_boundary_mask; /* * This specifies "machine infinity" for host templates which don't * limit the transfer size. Note this limit represents an absolute * maximum, and may be over the transfer limits allowed for * individual devices (e.g. 256 for SCSI-1). */ #define SCSI_DEFAULT_MAX_SECTORS 1024 /* * True if this host adapter can make good use of linked commands. * This will allow more than one command to be queued to a given * unit on a given host. Set this to the maximum number of command * blocks to be provided for each device. Set this to 1 for one * command block per lun, 2 for two, etc. Do not set this to 0. * You should make sure that the host adapter will do the right thing * before you try setting this above 1. */ short cmd_per_lun; /* If use block layer to manage tags, this is tag allocation policy */ int tag_alloc_policy; /* * Track QUEUE_FULL events and reduce queue depth on demand. */ unsigned track_queue_depth:1; /* * This specifies the mode that a LLD supports. */ unsigned supported_mode:2; /* * True for emulated SCSI host adapters (e.g. ATAPI). */ unsigned emulated:1; /* * True if the low-level driver performs its own reset-settle delays. */ unsigned skip_settle_delay:1; /* True if the controller does not support WRITE SAME */ unsigned no_write_same:1; /* True if the host uses host-wide tagspace */ unsigned host_tagset:1; /* The queuecommand callback may block. See also BLK_MQ_F_BLOCKING. */ unsigned queuecommand_may_block:1; /* * Countdown for host blocking with no commands outstanding. */ unsigned int max_host_blocked; /* * Default value for the blocking. If the queue is empty, * host_blocked counts down in the request_fn until it restarts * host operations as zero is reached. * * FIXME: This should probably be a value in the template */ #define SCSI_DEFAULT_HOST_BLOCKED 7 /* * Pointer to the SCSI host sysfs attribute groups, NULL terminated. */ const struct attribute_group **shost_groups; /* * Pointer to the SCSI device attribute groups for this host, * NULL terminated. */ const struct attribute_group **sdev_groups; /* * Vendor Identifier associated with the host * * Note: When specifying vendor_id, be sure to read the * Vendor Type and ID formatting requirements specified in * scsi_netlink.h */ u64 vendor_id; }; /* * Temporary #define for host lock push down. Can be removed when all * drivers have been updated to take advantage of unlocked * queuecommand. * */ #define DEF_SCSI_QCMD(func_name) \ int func_name(struct Scsi_Host *shost, struct scsi_cmnd *cmd) \ { \ unsigned long irq_flags; \ int rc; \ spin_lock_irqsave(shost->host_lock, irq_flags); \ rc = func_name##_lck(cmd); \ spin_unlock_irqrestore(shost->host_lock, irq_flags); \ return rc; \ } /* * shost state: If you alter this, you also need to alter scsi_sysfs.c * (for the ascii descriptions) and the state model enforcer: * scsi_host_set_state() */ enum scsi_host_state { SHOST_CREATED = 1, SHOST_RUNNING, SHOST_CANCEL, SHOST_DEL, SHOST_RECOVERY, SHOST_CANCEL_RECOVERY, SHOST_DEL_RECOVERY, }; struct Scsi_Host { /* * __devices is protected by the host_lock, but you should * usually use scsi_device_lookup / shost_for_each_device * to access it and don't care about locking yourself. * In the rare case of being in irq context you can use * their __ prefixed variants with the lock held. NEVER * access this list directly from a driver. */ struct list_head __devices; struct list_head __targets; struct list_head starved_list; spinlock_t default_lock; spinlock_t *host_lock; struct mutex scan_mutex;/* serialize scanning activity */ struct list_head eh_abort_list; struct list_head eh_cmd_q; struct task_struct * ehandler; /* Error recovery thread. */ struct completion * eh_action; /* Wait for specific actions on the host. */ wait_queue_head_t host_wait; const struct scsi_host_template *hostt; struct scsi_transport_template *transportt; struct kref tagset_refcnt; struct completion tagset_freed; /* Area to keep a shared tag map */ struct blk_mq_tag_set tag_set; atomic_t host_blocked; unsigned int host_failed; /* commands that failed. protected by host_lock */ unsigned int host_eh_scheduled; /* EH scheduled without command */ unsigned int host_no; /* Used for IOCTL_GET_IDLUN, /proc/scsi et al. */ /* next two fields are used to bound the time spent in error handling */ int eh_deadline; unsigned long last_reset; /* * These three parameters can be used to allow for wide scsi, * and for host adapters that support multiple busses * The last two should be set to 1 more than the actual max id * or lun (e.g. 8 for SCSI parallel systems). */ unsigned int max_channel; unsigned int max_id; u64 max_lun; /* * This is a unique identifier that must be assigned so that we * have some way of identifying each detected host adapter properly * and uniquely. For hosts that do not support more than one card * in the system at one time, this does not need to be set. It is * initialized to 0 in scsi_register. */ unsigned int unique_id; /* * The maximum length of SCSI commands that this host can accept. * Probably 12 for most host adapters, but could be 16 for others. * or 260 if the driver supports variable length cdbs. * For drivers that don't set this field, a value of 12 is * assumed. */ unsigned short max_cmd_len; int this_id; int can_queue; short cmd_per_lun; short unsigned int sg_tablesize; short unsigned int sg_prot_tablesize; unsigned int max_sectors; unsigned int opt_sectors; unsigned int max_segment_size; unsigned int dma_alignment; unsigned long dma_boundary; unsigned long virt_boundary_mask; /* * In scsi-mq mode, the number of hardware queues supported by the LLD. * * Note: it is assumed that each hardware queue has a queue depth of * can_queue. In other words, the total queue depth per host * is nr_hw_queues * can_queue. However, for when host_tagset is set, * the total queue depth is can_queue. */ unsigned nr_hw_queues; unsigned nr_maps; unsigned active_mode:2; /* * Host has requested that no further requests come through for the * time being. */ unsigned host_self_blocked:1; /* * Host uses correct SCSI ordering not PC ordering. The bit is * set for the minority of drivers whose authors actually read * the spec ;). */ unsigned reverse_ordering:1; /* Task mgmt function in progress */ unsigned tmf_in_progress:1; /* Asynchronous scan in progress */ unsigned async_scan:1; /* Don't resume host in EH */ unsigned eh_noresume:1; /* The controller does not support WRITE SAME */ unsigned no_write_same:1; /* True if the host uses host-wide tagspace */ unsigned host_tagset:1; /* The queuecommand callback may block. See also BLK_MQ_F_BLOCKING. */ unsigned queuecommand_may_block:1; /* Host responded with short (<36 bytes) INQUIRY result */ unsigned short_inquiry:1; /* The transport requires the LUN bits NOT to be stored in CDB[1] */ unsigned no_scsi2_lun_in_cdb:1; unsigned no_highmem:1; /* * Optional work queue to be utilized by the transport */ char work_q_name[20]; struct workqueue_struct *work_q; /* * Task management function work queue */ struct workqueue_struct *tmf_work_q; /* * Value host_blocked counts down from */ unsigned int max_host_blocked; /* Protection Information */ unsigned int prot_capabilities; unsigned char prot_guard_type; /* legacy crap */ unsigned long base; unsigned long io_port; unsigned char n_io_port; unsigned char dma_channel; unsigned int irq; enum scsi_host_state shost_state; /* ldm bits */ struct device shost_gendev, shost_dev; /* * Points to the transport data (if any) which is allocated * separately */ void *shost_data; /* * Points to the physical bus device we'd use to do DMA * Needed just in case we have virtual hosts. */ struct device *dma_dev; /* Delay for runtime autosuspend */ int rpm_autosuspend_delay; /* * We should ensure that this is aligned, both for better performance * and also because some compilers (m68k) don't automatically force * alignment to a long boundary. */ unsigned long hostdata[] /* Used for storage of host specific stuff */ __attribute__ ((aligned (sizeof(unsigned long)))); }; #define class_to_shost(d) \ container_of(d, struct Scsi_Host, shost_dev) #define shost_printk(prefix, shost, fmt, a...) \ dev_printk(prefix, &(shost)->shost_gendev, fmt, ##a) static inline void *shost_priv(struct Scsi_Host *shost) { return (void *)shost->hostdata; } int scsi_is_host_device(const struct device *); static inline struct Scsi_Host *dev_to_shost(struct device *dev) { while (!scsi_is_host_device(dev)) { if (!dev->parent) return NULL; dev = dev->parent; } return container_of(dev, struct Scsi_Host, shost_gendev); } static inline int scsi_host_in_recovery(struct Scsi_Host *shost) { return shost->shost_state == SHOST_RECOVERY || shost->shost_state == SHOST_CANCEL_RECOVERY || shost->shost_state == SHOST_DEL_RECOVERY || shost->tmf_in_progress; } extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *); extern void scsi_flush_work(struct Scsi_Host *); extern struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *, int); extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *, struct device *, struct device *); #if defined(CONFIG_SCSI_PROC_FS) struct proc_dir_entry * scsi_template_proc_dir(const struct scsi_host_template *sht); #else #define scsi_template_proc_dir(sht) NULL #endif extern void scsi_scan_host(struct Scsi_Host *); extern int scsi_resume_device(struct scsi_device *sdev); extern int scsi_rescan_device(struct scsi_device *sdev); extern void scsi_remove_host(struct Scsi_Host *); extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *); extern int scsi_host_busy(struct Scsi_Host *shost); extern void scsi_host_put(struct Scsi_Host *t); extern struct Scsi_Host *scsi_host_lookup(unsigned int hostnum); extern const char *scsi_host_state_name(enum scsi_host_state); extern void scsi_host_complete_all_commands(struct Scsi_Host *shost, enum scsi_host_status status); static inline int __must_check scsi_add_host(struct Scsi_Host *host, struct device *dev) { return scsi_add_host_with_dma(host, dev, dev); } static inline struct device *scsi_get_device(struct Scsi_Host *shost) { return shost->shost_gendev.parent; } /** * scsi_host_scan_allowed - Is scanning of this host allowed * @shost: Pointer to Scsi_Host. **/ static inline int scsi_host_scan_allowed(struct Scsi_Host *shost) { return shost->shost_state == SHOST_RUNNING || shost->shost_state == SHOST_RECOVERY; } extern void scsi_unblock_requests(struct Scsi_Host *); extern void scsi_block_requests(struct Scsi_Host *); extern int scsi_host_block(struct Scsi_Host *shost); extern int scsi_host_unblock(struct Scsi_Host *shost, int new_state); void scsi_host_busy_iter(struct Scsi_Host *, bool (*fn)(struct scsi_cmnd *, void *), void *priv); struct class_container; /* * DIF defines the exchange of protection information between * initiator and SBC block device. * * DIX defines the exchange of protection information between OS and * initiator. */ enum scsi_host_prot_capabilities { SHOST_DIF_TYPE1_PROTECTION = 1 << 0, /* T10 DIF Type 1 */ SHOST_DIF_TYPE2_PROTECTION = 1 << 1, /* T10 DIF Type 2 */ SHOST_DIF_TYPE3_PROTECTION = 1 << 2, /* T10 DIF Type 3 */ SHOST_DIX_TYPE0_PROTECTION = 1 << 3, /* DIX between OS and HBA only */ SHOST_DIX_TYPE1_PROTECTION = 1 << 4, /* DIX with DIF Type 1 */ SHOST_DIX_TYPE2_PROTECTION = 1 << 5, /* DIX with DIF Type 2 */ SHOST_DIX_TYPE3_PROTECTION = 1 << 6, /* DIX with DIF Type 3 */ }; /* * SCSI hosts which support the Data Integrity Extensions must * indicate their capabilities by setting the prot_capabilities using * this call. */ static inline void scsi_host_set_prot(struct Scsi_Host *shost, unsigned int mask) { shost->prot_capabilities = mask; } static inline unsigned int scsi_host_get_prot(struct Scsi_Host *shost) { return shost->prot_capabilities; } static inline int scsi_host_prot_dma(struct Scsi_Host *shost) { return shost->prot_capabilities >= SHOST_DIX_TYPE0_PROTECTION; } static inline unsigned int scsi_host_dif_capable(struct Scsi_Host *shost, unsigned int target_type) { static unsigned char cap[] = { 0, SHOST_DIF_TYPE1_PROTECTION, SHOST_DIF_TYPE2_PROTECTION, SHOST_DIF_TYPE3_PROTECTION }; if (target_type >= ARRAY_SIZE(cap)) return 0; return shost->prot_capabilities & cap[target_type] ? target_type : 0; } static inline unsigned int scsi_host_dix_capable(struct Scsi_Host *shost, unsigned int target_type) { #if defined(CONFIG_BLK_DEV_INTEGRITY) static unsigned char cap[] = { SHOST_DIX_TYPE0_PROTECTION, SHOST_DIX_TYPE1_PROTECTION, SHOST_DIX_TYPE2_PROTECTION, SHOST_DIX_TYPE3_PROTECTION }; if (target_type >= ARRAY_SIZE(cap)) return 0; return shost->prot_capabilities & cap[target_type]; #endif return 0; } /* * All DIX-capable initiators must support the T10-mandated CRC * checksum. Controllers can optionally implement the IP checksum * scheme which has much lower impact on system performance. Note * that the main rationale for the checksum is to match integrity * metadata with data. Detecting bit errors are a job for ECC memory * and buses. */ enum scsi_host_guard_type { SHOST_DIX_GUARD_CRC = 1 << 0, SHOST_DIX_GUARD_IP = 1 << 1, }; static inline void scsi_host_set_guard(struct Scsi_Host *shost, unsigned char type) { shost->prot_guard_type = type; } static inline unsigned char scsi_host_get_guard(struct Scsi_Host *shost) { return shost->prot_guard_type; } extern int scsi_host_set_state(struct Scsi_Host *, enum scsi_host_state); #endif /* _SCSI_SCSI_HOST_H */
275 7 4 1 7 7 7 36 36 294 91 294 293 78 19 38 62 78 78 19 19 7 19 19 19 19 78 78 11 78 68 46 8 46 46 46 46 78 78 78 78 78 78 68 78 44 4 78 4 4 4 78 77 78 68 49 19 19 19 19 39 175 218 218 219 24 24 24 10 20 19 19 19 19 195 160 195 195 35 160 35 35 35 160 195 195 195 226 226 226 225 146 110 110 146 19 19 19 19 19 19 19 191 278 4 4 4 4 202 201 5 202 110 24 13 24 23 7 1 7 24 6 6 5 5 8 8 8 275 275 276 190 125 275 18 108 108 192 218 275 1 4706 1 4706 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 // SPDX-License-Identifier: GPL-2.0-only /* * linux/mm/swapfile.c * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * Swap reorganised 29.12.95, Stephen Tweedie */ #include <linux/blkdev.h> #include <linux/mm.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> #include <linux/hugetlb.h> #include <linux/mman.h> #include <linux/slab.h> #include <linux/kernel_stat.h> #include <linux/swap.h> #include <linux/vmalloc.h> #include <linux/pagemap.h> #include <linux/namei.h> #include <linux/shmem_fs.h> #include <linux/blk-cgroup.h> #include <linux/random.h> #include <linux/writeback.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/init.h> #include <linux/ksm.h> #include <linux/rmap.h> #include <linux/security.h> #include <linux/backing-dev.h> #include <linux/mutex.h> #include <linux/capability.h> #include <linux/syscalls.h> #include <linux/memcontrol.h> #include <linux/poll.h> #include <linux/oom.h> #include <linux/swapfile.h> #include <linux/export.h> #include <linux/swap_slots.h> #include <linux/sort.h> #include <linux/completion.h> #include <linux/suspend.h> #include <linux/zswap.h> #include <linux/plist.h> #include <asm/tlbflush.h> #include <linux/swapops.h> #include <linux/swap_cgroup.h> #include "internal.h" #include "swap.h" static bool swap_count_continued(struct swap_info_struct *, pgoff_t, unsigned char); static void free_swap_count_continuations(struct swap_info_struct *); static DEFINE_SPINLOCK(swap_lock); static unsigned int nr_swapfiles; atomic_long_t nr_swap_pages; /* * Some modules use swappable objects and may try to swap them out under * memory pressure (via the shrinker). Before doing so, they may wish to * check to see if any swap space is available. */ EXPORT_SYMBOL_GPL(nr_swap_pages); /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */ long total_swap_pages; static int least_priority = -1; unsigned long swapfile_maximum_size; #ifdef CONFIG_MIGRATION bool swap_migration_ad_supported; #endif /* CONFIG_MIGRATION */ static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; static const char Unused_offset[] = "Unused swap offset entry "; /* * all active swap_info_structs * protected with swap_lock, and ordered by priority. */ static PLIST_HEAD(swap_active_head); /* * all available (active, not full) swap_info_structs * protected with swap_avail_lock, ordered by priority. * This is used by folio_alloc_swap() instead of swap_active_head * because swap_active_head includes all swap_info_structs, * but folio_alloc_swap() doesn't need to look at full ones. * This uses its own lock instead of swap_lock because when a * swap_info_struct changes between not-full/full, it needs to * add/remove itself to/from this list, but the swap_info_struct->lock * is held and the locking order requires swap_lock to be taken * before any swap_info_struct->lock. */ static struct plist_head *swap_avail_heads; static DEFINE_SPINLOCK(swap_avail_lock); static struct swap_info_struct *swap_info[MAX_SWAPFILES]; static DEFINE_MUTEX(swapon_mutex); static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait); /* Activity counter to indicate that a swapon or swapoff has occurred */ static atomic_t proc_poll_event = ATOMIC_INIT(0); atomic_t nr_rotate_swap = ATOMIC_INIT(0); static struct swap_info_struct *swap_type_to_swap_info(int type) { if (type >= MAX_SWAPFILES) return NULL; return READ_ONCE(swap_info[type]); /* rcu_dereference() */ } static inline unsigned char swap_count(unsigned char ent) { return ent & ~SWAP_HAS_CACHE; /* may include COUNT_CONTINUED flag */ } /* Reclaim the swap entry anyway if possible */ #define TTRS_ANYWAY 0x1 /* * Reclaim the swap entry if there are no more mappings of the * corresponding page */ #define TTRS_UNMAPPED 0x2 /* Reclaim the swap entry if swap is getting full*/ #define TTRS_FULL 0x4 /* * returns number of pages in the folio that backs the swap entry. If positive, * the folio was reclaimed. If negative, the folio was not reclaimed. If 0, no * folio was associated with the swap entry. */ static int __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset, unsigned long flags) { swp_entry_t entry = swp_entry(si->type, offset); struct folio *folio; int ret = 0; folio = filemap_get_folio(swap_address_space(entry), swap_cache_index(entry)); if (IS_ERR(folio)) return 0; /* * When this function is called from scan_swap_map_slots() and it's * called by vmscan.c at reclaiming folios. So we hold a folio lock * here. We have to use trylock for avoiding deadlock. This is a special * case and you should use folio_free_swap() with explicit folio_lock() * in usual operations. */ if (folio_trylock(folio)) { if ((flags & TTRS_ANYWAY) || ((flags & TTRS_UNMAPPED) && !folio_mapped(folio)) || ((flags & TTRS_FULL) && mem_cgroup_swap_full(folio))) ret = folio_free_swap(folio); folio_unlock(folio); } ret = ret ? folio_nr_pages(folio) : -folio_nr_pages(folio); folio_put(folio); return ret; } static inline struct swap_extent *first_se(struct swap_info_struct *sis) { struct rb_node *rb = rb_first(&sis->swap_extent_root); return rb_entry(rb, struct swap_extent, rb_node); } static inline struct swap_extent *next_se(struct swap_extent *se) { struct rb_node *rb = rb_next(&se->rb_node); return rb ? rb_entry(rb, struct swap_extent, rb_node) : NULL; } /* * swapon tell device that all the old swap contents can be discarded, * to allow the swap device to optimize its wear-levelling. */ static int discard_swap(struct swap_info_struct *si) { struct swap_extent *se; sector_t start_block; sector_t nr_blocks; int err = 0; /* Do not discard the swap header page! */ se = first_se(si); start_block = (se->start_block + 1) << (PAGE_SHIFT - 9); nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); if (nr_blocks) { err = blkdev_issue_discard(si->bdev, start_block, nr_blocks, GFP_KERNEL); if (err) return err; cond_resched(); } for (se = next_se(se); se; se = next_se(se)) { start_block = se->start_block << (PAGE_SHIFT - 9); nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); err = blkdev_issue_discard(si->bdev, start_block, nr_blocks, GFP_KERNEL); if (err) break; cond_resched(); } return err; /* That will often be -EOPNOTSUPP */ } static struct swap_extent * offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset) { struct swap_extent *se; struct rb_node *rb; rb = sis->swap_extent_root.rb_node; while (rb) { se = rb_entry(rb, struct swap_extent, rb_node); if (offset < se->start_page) rb = rb->rb_left; else if (offset >= se->start_page + se->nr_pages) rb = rb->rb_right; else return se; } /* It *must* be present */ BUG(); } sector_t swap_folio_sector(struct folio *folio) { struct swap_info_struct *sis = swp_swap_info(folio->swap); struct swap_extent *se; sector_t sector; pgoff_t offset; offset = swp_offset(folio->swap); se = offset_to_swap_extent(sis, offset); sector = se->start_block + (offset - se->start_page); return sector << (PAGE_SHIFT - 9); } /* * swap allocation tell device that a cluster of swap can now be discarded, * to allow the swap device to optimize its wear-levelling. */ static void discard_swap_cluster(struct swap_info_struct *si, pgoff_t start_page, pgoff_t nr_pages) { struct swap_extent *se = offset_to_swap_extent(si, start_page); while (nr_pages) { pgoff_t offset = start_page - se->start_page; sector_t start_block = se->start_block + offset; sector_t nr_blocks = se->nr_pages - offset; if (nr_blocks > nr_pages) nr_blocks = nr_pages; start_page += nr_blocks; nr_pages -= nr_blocks; start_block <<= PAGE_SHIFT - 9; nr_blocks <<= PAGE_SHIFT - 9; if (blkdev_issue_discard(si->bdev, start_block, nr_blocks, GFP_NOIO)) break; se = next_se(se); } } #ifdef CONFIG_THP_SWAP #define SWAPFILE_CLUSTER HPAGE_PMD_NR #define swap_entry_order(order) (order) #else #define SWAPFILE_CLUSTER 256 /* * Define swap_entry_order() as constant to let compiler to optimize * out some code if !CONFIG_THP_SWAP */ #define swap_entry_order(order) 0 #endif #define LATENCY_LIMIT 256 static inline void cluster_set_flag(struct swap_cluster_info *info, unsigned int flag) { info->flags = flag; } static inline unsigned int cluster_count(struct swap_cluster_info *info) { return info->data; } static inline void cluster_set_count(struct swap_cluster_info *info, unsigned int c) { info->data = c; } static inline void cluster_set_count_flag(struct swap_cluster_info *info, unsigned int c, unsigned int f) { info->flags = f; info->data = c; } static inline unsigned int cluster_next(struct swap_cluster_info *info) { return info->data; } static inline void cluster_set_next(struct swap_cluster_info *info, unsigned int n) { info->data = n; } static inline void cluster_set_next_flag(struct swap_cluster_info *info, unsigned int n, unsigned int f) { info->flags = f; info->data = n; } static inline bool cluster_is_free(struct swap_cluster_info *info) { return info->flags & CLUSTER_FLAG_FREE; } static inline bool cluster_is_null(struct swap_cluster_info *info) { return info->flags & CLUSTER_FLAG_NEXT_NULL; } static inline void cluster_set_null(struct swap_cluster_info *info) { info->flags = CLUSTER_FLAG_NEXT_NULL; info->data = 0; } static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si, unsigned long offset) { struct swap_cluster_info *ci; ci = si->cluster_info; if (ci) { ci += offset / SWAPFILE_CLUSTER; spin_lock(&ci->lock); } return ci; } static inline void unlock_cluster(struct swap_cluster_info *ci) { if (ci) spin_unlock(&ci->lock); } /* * Determine the locking method in use for this device. Return * swap_cluster_info if SSD-style cluster-based locking is in place. */ static inline struct swap_cluster_info *lock_cluster_or_swap_info( struct swap_info_struct *si, unsigned long offset) { struct swap_cluster_info *ci; /* Try to use fine-grained SSD-style locking if available: */ ci = lock_cluster(si, offset); /* Otherwise, fall back to traditional, coarse locking: */ if (!ci) spin_lock(&si->lock); return ci; } static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si, struct swap_cluster_info *ci) { if (ci) unlock_cluster(ci); else spin_unlock(&si->lock); } static inline bool cluster_list_empty(struct swap_cluster_list *list) { return cluster_is_null(&list->head); } static inline unsigned int cluster_list_first(struct swap_cluster_list *list) { return cluster_next(&list->head); } static void cluster_list_init(struct swap_cluster_list *list) { cluster_set_null(&list->head); cluster_set_null(&list->tail); } static void cluster_list_add_tail(struct swap_cluster_list *list, struct swap_cluster_info *ci, unsigned int idx) { if (cluster_list_empty(list)) { cluster_set_next_flag(&list->head, idx, 0); cluster_set_next_flag(&list->tail, idx, 0); } else { struct swap_cluster_info *ci_tail; unsigned int tail = cluster_next(&list->tail); /* * Nested cluster lock, but both cluster locks are * only acquired when we held swap_info_struct->lock */ ci_tail = ci + tail; spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING); cluster_set_next(ci_tail, idx); spin_unlock(&ci_tail->lock); cluster_set_next_flag(&list->tail, idx, 0); } } static unsigned int cluster_list_del_first(struct swap_cluster_list *list, struct swap_cluster_info *ci) { unsigned int idx; idx = cluster_next(&list->head); if (cluster_next(&list->tail) == idx) { cluster_set_null(&list->head); cluster_set_null(&list->tail); } else cluster_set_next_flag(&list->head, cluster_next(&ci[idx]), 0); return idx; } /* Add a cluster to discard list and schedule it to do discard */ static void swap_cluster_schedule_discard(struct swap_info_struct *si, unsigned int idx) { /* * If scan_swap_map_slots() can't find a free cluster, it will check * si->swap_map directly. To make sure the discarding cluster isn't * taken by scan_swap_map_slots(), mark the swap entries bad (occupied). * It will be cleared after discard */ memset(si->swap_map + idx * SWAPFILE_CLUSTER, SWAP_MAP_BAD, SWAPFILE_CLUSTER); cluster_list_add_tail(&si->discard_clusters, si->cluster_info, idx); schedule_work(&si->discard_work); } static void __free_cluster(struct swap_info_struct *si, unsigned long idx) { struct swap_cluster_info *ci = si->cluster_info; cluster_set_flag(ci + idx, CLUSTER_FLAG_FREE); cluster_list_add_tail(&si->free_clusters, ci, idx); } /* * Doing discard actually. After a cluster discard is finished, the cluster * will be added to free cluster list. caller should hold si->lock. */ static void swap_do_scheduled_discard(struct swap_info_struct *si) { struct swap_cluster_info *info, *ci; unsigned int idx; info = si->cluster_info; while (!cluster_list_empty(&si->discard_clusters)) { idx = cluster_list_del_first(&si->discard_clusters, info); spin_unlock(&si->lock); discard_swap_cluster(si, idx * SWAPFILE_CLUSTER, SWAPFILE_CLUSTER); spin_lock(&si->lock); ci = lock_cluster(si, idx * SWAPFILE_CLUSTER); __free_cluster(si, idx); memset(si->swap_map + idx * SWAPFILE_CLUSTER, 0, SWAPFILE_CLUSTER); unlock_cluster(ci); } } static void swap_discard_work(struct work_struct *work) { struct swap_info_struct *si; si = container_of(work, struct swap_info_struct, discard_work); spin_lock(&si->lock); swap_do_scheduled_discard(si); spin_unlock(&si->lock); } static void swap_users_ref_free(struct percpu_ref *ref) { struct swap_info_struct *si; si = container_of(ref, struct swap_info_struct, users); complete(&si->comp); } static void alloc_cluster(struct swap_info_struct *si, unsigned long idx) { struct swap_cluster_info *ci = si->cluster_info; VM_BUG_ON(cluster_list_first(&si->free_clusters) != idx); cluster_list_del_first(&si->free_clusters, ci); cluster_set_count_flag(ci + idx, 0, 0); } static void free_cluster(struct swap_info_struct *si, unsigned long idx) { struct swap_cluster_info *ci = si->cluster_info + idx; VM_BUG_ON(cluster_count(ci) != 0); /* * If the swap is discardable, prepare discard the cluster * instead of free it immediately. The cluster will be freed * after discard. */ if ((si->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) == (SWP_WRITEOK | SWP_PAGE_DISCARD)) { swap_cluster_schedule_discard(si, idx); return; } __free_cluster(si, idx); } /* * The cluster corresponding to page_nr will be used. The cluster will be * removed from free cluster list and its usage counter will be increased by * count. */ static void add_cluster_info_page(struct swap_info_struct *p, struct swap_cluster_info *cluster_info, unsigned long page_nr, unsigned long count) { unsigned long idx = page_nr / SWAPFILE_CLUSTER; if (!cluster_info) return; if (cluster_is_free(&cluster_info[idx])) alloc_cluster(p, idx); VM_BUG_ON(cluster_count(&cluster_info[idx]) + count > SWAPFILE_CLUSTER); cluster_set_count(&cluster_info[idx], cluster_count(&cluster_info[idx]) + count); } /* * The cluster corresponding to page_nr will be used. The cluster will be * removed from free cluster list and its usage counter will be increased by 1. */ static void inc_cluster_info_page(struct swap_info_struct *p, struct swap_cluster_info *cluster_info, unsigned long page_nr) { add_cluster_info_page(p, cluster_info, page_nr, 1); } /* * The cluster corresponding to page_nr decreases one usage. If the usage * counter becomes 0, which means no page in the cluster is in using, we can * optionally discard the cluster and add it to free cluster list. */ static void dec_cluster_info_page(struct swap_info_struct *p, struct swap_cluster_info *cluster_info, unsigned long page_nr) { unsigned long idx = page_nr / SWAPFILE_CLUSTER; if (!cluster_info) return; VM_BUG_ON(cluster_count(&cluster_info[idx]) == 0); cluster_set_count(&cluster_info[idx], cluster_count(&cluster_info[idx]) - 1); if (cluster_count(&cluster_info[idx]) == 0) free_cluster(p, idx); } /* * It's possible scan_swap_map_slots() uses a free cluster in the middle of free * cluster list. Avoiding such abuse to avoid list corruption. */ static bool scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si, unsigned long offset, int order) { struct percpu_cluster *percpu_cluster; bool conflict; offset /= SWAPFILE_CLUSTER; conflict = !cluster_list_empty(&si->free_clusters) && offset != cluster_list_first(&si->free_clusters) && cluster_is_free(&si->cluster_info[offset]); if (!conflict) return false; percpu_cluster = this_cpu_ptr(si->percpu_cluster); percpu_cluster->next[order] = SWAP_NEXT_INVALID; return true; } static inline bool swap_range_empty(char *swap_map, unsigned int start, unsigned int nr_pages) { unsigned int i; for (i = 0; i < nr_pages; i++) { if (swap_map[start + i]) return false; } return true; } /* * Try to get swap entries with specified order from current cpu's swap entry * pool (a cluster). This might involve allocating a new cluster for current CPU * too. */ static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, unsigned long *offset, unsigned long *scan_base, int order) { unsigned int nr_pages = 1 << order; struct percpu_cluster *cluster; struct swap_cluster_info *ci; unsigned int tmp, max; new_cluster: cluster = this_cpu_ptr(si->percpu_cluster); tmp = cluster->next[order]; if (tmp == SWAP_NEXT_INVALID) { if (!cluster_list_empty(&si->free_clusters)) { tmp = cluster_next(&si->free_clusters.head) * SWAPFILE_CLUSTER; } else if (!cluster_list_empty(&si->discard_clusters)) { /* * we don't have free cluster but have some clusters in * discarding, do discard now and reclaim them, then * reread cluster_next_cpu since we dropped si->lock */ swap_do_scheduled_discard(si); *scan_base = this_cpu_read(*si->cluster_next_cpu); *offset = *scan_base; goto new_cluster; } else return false; } /* * Other CPUs can use our cluster if they can't find a free cluster, * check if there is still free entry in the cluster, maintaining * natural alignment. */ max = min_t(unsigned long, si->max, ALIGN(tmp + 1, SWAPFILE_CLUSTER)); if (tmp < max) { ci = lock_cluster(si, tmp); while (tmp < max) { if (swap_range_empty(si->swap_map, tmp, nr_pages)) break; tmp += nr_pages; } unlock_cluster(ci); } if (tmp >= max) { cluster->next[order] = SWAP_NEXT_INVALID; goto new_cluster; } *offset = tmp; *scan_base = tmp; tmp += nr_pages; cluster->next[order] = tmp < max ? tmp : SWAP_NEXT_INVALID; return true; } static void __del_from_avail_list(struct swap_info_struct *p) { int nid; assert_spin_locked(&p->lock); for_each_node(nid) plist_del(&p->avail_lists[nid], &swap_avail_heads[nid]); } static void del_from_avail_list(struct swap_info_struct *p) { spin_lock(&swap_avail_lock); __del_from_avail_list(p); spin_unlock(&swap_avail_lock); } static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset, unsigned int nr_entries) { unsigned int end = offset + nr_entries - 1; if (offset == si->lowest_bit) si->lowest_bit += nr_entries; if (end == si->highest_bit) WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries); WRITE_ONCE(si->inuse_pages, si->inuse_pages + nr_entries); if (si->inuse_pages == si->pages) { si->lowest_bit = si->max; si->highest_bit = 0; del_from_avail_list(si); } } static void add_to_avail_list(struct swap_info_struct *p) { int nid; spin_lock(&swap_avail_lock); for_each_node(nid) plist_add(&p->avail_lists[nid], &swap_avail_heads[nid]); spin_unlock(&swap_avail_lock); } static void swap_range_free(struct swap_info_struct *si, unsigned long offset, unsigned int nr_entries) { unsigned long begin = offset; unsigned long end = offset + nr_entries - 1; void (*swap_slot_free_notify)(struct block_device *, unsigned long); if (offset < si->lowest_bit) si->lowest_bit = offset; if (end > si->highest_bit) { bool was_full = !si->highest_bit; WRITE_ONCE(si->highest_bit, end); if (was_full && (si->flags & SWP_WRITEOK)) add_to_avail_list(si); } if (si->flags & SWP_BLKDEV) swap_slot_free_notify = si->bdev->bd_disk->fops->swap_slot_free_notify; else swap_slot_free_notify = NULL; while (offset <= end) { arch_swap_invalidate_page(si->type, offset); if (swap_slot_free_notify) swap_slot_free_notify(si->bdev, offset); offset++; } clear_shadow_from_swap_cache(si->type, begin, end); /* * Make sure that try_to_unuse() observes si->inuse_pages reaching 0 * only after the above cleanups are done. */ smp_wmb(); atomic_long_add(nr_entries, &nr_swap_pages); WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries); } static void set_cluster_next(struct swap_info_struct *si, unsigned long next) { unsigned long prev; if (!(si->flags & SWP_SOLIDSTATE)) { si->cluster_next = next; return; } prev = this_cpu_read(*si->cluster_next_cpu); /* * Cross the swap address space size aligned trunk, choose * another trunk randomly to avoid lock contention on swap * address space if possible. */ if ((prev >> SWAP_ADDRESS_SPACE_SHIFT) != (next >> SWAP_ADDRESS_SPACE_SHIFT)) { /* No free swap slots available */ if (si->highest_bit <= si->lowest_bit) return; next = get_random_u32_inclusive(si->lowest_bit, si->highest_bit); next = ALIGN_DOWN(next, SWAP_ADDRESS_SPACE_PAGES); next = max_t(unsigned int, next, si->lowest_bit); } this_cpu_write(*si->cluster_next_cpu, next); } static bool swap_offset_available_and_locked(struct swap_info_struct *si, unsigned long offset) { if (data_race(!si->swap_map[offset])) { spin_lock(&si->lock); return true; } if (vm_swap_full() && READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { spin_lock(&si->lock); return true; } return false; } static int scan_swap_map_slots(struct swap_info_struct *si, unsigned char usage, int nr, swp_entry_t slots[], int order) { struct swap_cluster_info *ci; unsigned long offset; unsigned long scan_base; unsigned long last_in_cluster = 0; int latency_ration = LATENCY_LIMIT; unsigned int nr_pages = 1 << order; int n_ret = 0; bool scanned_many = false; /* * We try to cluster swap pages by allocating them sequentially * in swap. Once we've allocated SWAPFILE_CLUSTER pages this * way, however, we resort to first-free allocation, starting * a new cluster. This prevents us from scattering swap pages * all over the entire swap partition, so that we reduce * overall disk seek times between swap pages. -- sct * But we do now try to find an empty cluster. -Andrea * And we let swap pages go all over an SSD partition. Hugh */ if (order > 0) { /* * Should not even be attempting large allocations when huge * page swap is disabled. Warn and fail the allocation. */ if (!IS_ENABLED(CONFIG_THP_SWAP) || nr_pages > SWAPFILE_CLUSTER) { VM_WARN_ON_ONCE(1); return 0; } /* * Swapfile is not block device or not using clusters so unable * to allocate large entries. */ if (!(si->flags & SWP_BLKDEV) || !si->cluster_info) return 0; } si->flags += SWP_SCANNING; /* * Use percpu scan base for SSD to reduce lock contention on * cluster and swap cache. For HDD, sequential access is more * important. */ if (si->flags & SWP_SOLIDSTATE) scan_base = this_cpu_read(*si->cluster_next_cpu); else scan_base = si->cluster_next; offset = scan_base; /* SSD algorithm */ if (si->cluster_info) { if (!scan_swap_map_try_ssd_cluster(si, &offset, &scan_base, order)) { if (order > 0) goto no_page; goto scan; } } else if (unlikely(!si->cluster_nr--)) { if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) { si->cluster_nr = SWAPFILE_CLUSTER - 1; goto checks; } spin_unlock(&si->lock); /* * If seek is expensive, start searching for new cluster from * start of partition, to minimize the span of allocated swap. * If seek is cheap, that is the SWP_SOLIDSTATE si->cluster_info * case, just handled by scan_swap_map_try_ssd_cluster() above. */ scan_base = offset = si->lowest_bit; last_in_cluster = offset + SWAPFILE_CLUSTER - 1; /* Locate the first empty (unaligned) cluster */ for (; last_in_cluster <= READ_ONCE(si->highest_bit); offset++) { if (si->swap_map[offset]) last_in_cluster = offset + SWAPFILE_CLUSTER; else if (offset == last_in_cluster) { spin_lock(&si->lock); offset -= SWAPFILE_CLUSTER - 1; si->cluster_next = offset; si->cluster_nr = SWAPFILE_CLUSTER - 1; goto checks; } if (unlikely(--latency_ration < 0)) { cond_resched(); latency_ration = LATENCY_LIMIT; } } offset = scan_base; spin_lock(&si->lock); si->cluster_nr = SWAPFILE_CLUSTER - 1; } checks: if (si->cluster_info) { while (scan_swap_map_ssd_cluster_conflict(si, offset, order)) { /* take a break if we already got some slots */ if (n_ret) goto done; if (!scan_swap_map_try_ssd_cluster(si, &offset, &scan_base, order)) { if (order > 0) goto no_page; goto scan; } } } if (!(si->flags & SWP_WRITEOK)) goto no_page; if (!si->highest_bit) goto no_page; if (offset > si->highest_bit) scan_base = offset = si->lowest_bit; ci = lock_cluster(si, offset); /* reuse swap entry of cache-only swap if not busy. */ if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { int swap_was_freed; unlock_cluster(ci); spin_unlock(&si->lock); swap_was_freed = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY); spin_lock(&si->lock); /* entry was freed successfully, try to use this again */ if (swap_was_freed > 0) goto checks; goto scan; /* check next one */ } if (si->swap_map[offset]) { unlock_cluster(ci); if (!n_ret) goto scan; else goto done; } memset(si->swap_map + offset, usage, nr_pages); add_cluster_info_page(si, si->cluster_info, offset, nr_pages); unlock_cluster(ci); swap_range_alloc(si, offset, nr_pages); slots[n_ret++] = swp_entry(si->type, offset); /* got enough slots or reach max slots? */ if ((n_ret == nr) || (offset >= si->highest_bit)) goto done; /* search for next available slot */ /* time to take a break? */ if (unlikely(--latency_ration < 0)) { if (n_ret) goto done; spin_unlock(&si->lock); cond_resched(); spin_lock(&si->lock); latency_ration = LATENCY_LIMIT; } /* try to get more slots in cluster */ if (si->cluster_info) { if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base, order)) goto checks; if (order > 0) goto done; } else if (si->cluster_nr && !si->swap_map[++offset]) { /* non-ssd case, still more slots in cluster? */ --si->cluster_nr; goto checks; } /* * Even if there's no free clusters available (fragmented), * try to scan a little more quickly with lock held unless we * have scanned too many slots already. */ if (!scanned_many) { unsigned long scan_limit; if (offset < scan_base) scan_limit = scan_base; else scan_limit = si->highest_bit; for (; offset <= scan_limit && --latency_ration > 0; offset++) { if (!si->swap_map[offset]) goto checks; } } done: if (order == 0) set_cluster_next(si, offset + 1); si->flags -= SWP_SCANNING; return n_ret; scan: VM_WARN_ON(order > 0); spin_unlock(&si->lock); while (++offset <= READ_ONCE(si->highest_bit)) { if (unlikely(--latency_ration < 0)) { cond_resched(); latency_ration = LATENCY_LIMIT; scanned_many = true; } if (swap_offset_available_and_locked(si, offset)) goto checks; } offset = si->lowest_bit; while (offset < scan_base) { if (unlikely(--latency_ration < 0)) { cond_resched(); latency_ration = LATENCY_LIMIT; scanned_many = true; } if (swap_offset_available_and_locked(si, offset)) goto checks; offset++; } spin_lock(&si->lock); no_page: si->flags -= SWP_SCANNING; return n_ret; } static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx) { unsigned long offset = idx * SWAPFILE_CLUSTER; struct swap_cluster_info *ci; ci = lock_cluster(si, offset); memset(si->swap_map + offset, 0, SWAPFILE_CLUSTER); cluster_set_count_flag(ci, 0, 0); free_cluster(si, idx); unlock_cluster(ci); swap_range_free(si, offset, SWAPFILE_CLUSTER); } int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order) { int order = swap_entry_order(entry_order); unsigned long size = 1 << order; struct swap_info_struct *si, *next; long avail_pgs; int n_ret = 0; int node; spin_lock(&swap_avail_lock); avail_pgs = atomic_long_read(&nr_swap_pages) / size; if (avail_pgs <= 0) { spin_unlock(&swap_avail_lock); goto noswap; } n_goal = min3((long)n_goal, (long)SWAP_BATCH, avail_pgs); atomic_long_sub(n_goal * size, &nr_swap_pages); start_over: node = numa_node_id(); plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) { /* requeue si to after same-priority siblings */ plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); spin_unlock(&swap_avail_lock); spin_lock(&si->lock); if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) { spin_lock(&swap_avail_lock); if (plist_node_empty(&si->avail_lists[node])) { spin_unlock(&si->lock); goto nextsi; } WARN(!si->highest_bit, "swap_info %d in list but !highest_bit\n", si->type); WARN(!(si->flags & SWP_WRITEOK), "swap_info %d in list but !SWP_WRITEOK\n", si->type); __del_from_avail_list(si); spin_unlock(&si->lock); goto nextsi; } n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE, n_goal, swp_entries, order); spin_unlock(&si->lock); if (n_ret || size > 1) goto check_out; cond_resched(); spin_lock(&swap_avail_lock); nextsi: /* * if we got here, it's likely that si was almost full before, * and since scan_swap_map_slots() can drop the si->lock, * multiple callers probably all tried to get a page from the * same si and it filled up before we could get one; or, the si * filled up between us dropping swap_avail_lock and taking * si->lock. Since we dropped the swap_avail_lock, the * swap_avail_head list may have been modified; so if next is * still in the swap_avail_head list then try it, otherwise * start over if we have not gotten any slots. */ if (plist_node_empty(&next->avail_lists[node])) goto start_over; } spin_unlock(&swap_avail_lock); check_out: if (n_ret < n_goal) atomic_long_add((long)(n_goal - n_ret) * size, &nr_swap_pages); noswap: return n_ret; } static struct swap_info_struct *_swap_info_get(swp_entry_t entry) { struct swap_info_struct *p; unsigned long offset; if (!entry.val) goto out; p = swp_swap_info(entry); if (!p) goto bad_nofile; if (data_race(!(p->flags & SWP_USED))) goto bad_device; offset = swp_offset(entry); if (offset >= p->max) goto bad_offset; if (data_race(!p->swap_map[swp_offset(entry)])) goto bad_free; return p; bad_free: pr_err("%s: %s%08lx\n", __func__, Unused_offset, entry.val); goto out; bad_offset: pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val); goto out; bad_device: pr_err("%s: %s%08lx\n", __func__, Unused_file, entry.val); goto out; bad_nofile: pr_err("%s: %s%08lx\n", __func__, Bad_file, entry.val); out: return NULL; } static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry, struct swap_info_struct *q) { struct swap_info_struct *p; p = _swap_info_get(entry); if (p != q) { if (q != NULL) spin_unlock(&q->lock); if (p != NULL) spin_lock(&p->lock); } return p; } static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, unsigned long offset, unsigned char usage) { unsigned char count; unsigned char has_cache; count = p->swap_map[offset]; has_cache = count & SWAP_HAS_CACHE; count &= ~SWAP_HAS_CACHE; if (usage == SWAP_HAS_CACHE) { VM_BUG_ON(!has_cache); has_cache = 0; } else if (count == SWAP_MAP_SHMEM) { /* * Or we could insist on shmem.c using a special * swap_shmem_free() and free_shmem_swap_and_cache()... */ count = 0; } else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) { if (count == COUNT_CONTINUED) { if (swap_count_continued(p, offset, count)) count = SWAP_MAP_MAX | COUNT_CONTINUED; else count = SWAP_MAP_MAX; } else count--; } usage = count | has_cache; if (usage) WRITE_ONCE(p->swap_map[offset], usage); else WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE); return usage; } /* * When we get a swap entry, if there aren't some other ways to * prevent swapoff, such as the folio in swap cache is locked, RCU * reader side is locked, etc., the swap entry may become invalid * because of swapoff. Then, we need to enclose all swap related * functions with get_swap_device() and put_swap_device(), unless the * swap functions call get/put_swap_device() by themselves. * * RCU reader side lock (including any spinlock) is sufficient to * prevent swapoff, because synchronize_rcu() is called in swapoff() * before freeing data structures. * * Check whether swap entry is valid in the swap device. If so, * return pointer to swap_info_struct, and keep the swap entry valid * via preventing the swap device from being swapoff, until * put_swap_device() is called. Otherwise return NULL. * * Notice that swapoff or swapoff+swapon can still happen before the * percpu_ref_tryget_live() in get_swap_device() or after the * percpu_ref_put() in put_swap_device() if there isn't any other way * to prevent swapoff. The caller must be prepared for that. For * example, the following situation is possible. * * CPU1 CPU2 * do_swap_page() * ... swapoff+swapon * __read_swap_cache_async() * swapcache_prepare() * __swap_duplicate() * // check swap_map * // verify PTE not changed * * In __swap_duplicate(), the swap_map need to be checked before * changing partly because the specified swap entry may be for another * swap device which has been swapoff. And in do_swap_page(), after * the page is read from the swap device, the PTE is verified not * changed with the page table locked to check whether the swap device * has been swapoff or swapoff+swapon. */ struct swap_info_struct *get_swap_device(swp_entry_t entry) { struct swap_info_struct *si; unsigned long offset; if (!entry.val) goto out; si = swp_swap_info(entry); if (!si) goto bad_nofile; if (!percpu_ref_tryget_live(&si->users)) goto out; /* * Guarantee the si->users are checked before accessing other * fields of swap_info_struct. * * Paired with the spin_unlock() after setup_swap_info() in * enable_swap_info(). */ smp_rmb(); offset = swp_offset(entry); if (offset >= si->max) goto put_out; return si; bad_nofile: pr_err("%s: %s%08lx\n", __func__, Bad_file, entry.val); out: return NULL; put_out: pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val); percpu_ref_put(&si->users); return NULL; } static unsigned char __swap_entry_free(struct swap_info_struct *p, swp_entry_t entry) { struct swap_cluster_info *ci; unsigned long offset = swp_offset(entry); unsigned char usage; ci = lock_cluster_or_swap_info(p, offset); usage = __swap_entry_free_locked(p, offset, 1); unlock_cluster_or_swap_info(p, ci); if (!usage) free_swap_slot(entry); return usage; } static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry) { struct swap_cluster_info *ci; unsigned long offset = swp_offset(entry); unsigned char count; ci = lock_cluster(p, offset); count = p->swap_map[offset]; VM_BUG_ON(count != SWAP_HAS_CACHE); p->swap_map[offset] = 0; dec_cluster_info_page(p, p->cluster_info, offset); unlock_cluster(ci); mem_cgroup_uncharge_swap(entry, 1); swap_range_free(p, offset, 1); } static void cluster_swap_free_nr(struct swap_info_struct *sis, unsigned long offset, int nr_pages) { struct swap_cluster_info *ci; DECLARE_BITMAP(to_free, BITS_PER_LONG) = { 0 }; int i, nr; ci = lock_cluster_or_swap_info(sis, offset); while (nr_pages) { nr = min(BITS_PER_LONG, nr_pages); for (i = 0; i < nr; i++) { if (!__swap_entry_free_locked(sis, offset + i, 1)) bitmap_set(to_free, i, 1); } if (!bitmap_empty(to_free, BITS_PER_LONG)) { unlock_cluster_or_swap_info(sis, ci); for_each_set_bit(i, to_free, BITS_PER_LONG) free_swap_slot(swp_entry(sis->type, offset + i)); if (nr == nr_pages) return; bitmap_clear(to_free, 0, BITS_PER_LONG); ci = lock_cluster_or_swap_info(sis, offset); } offset += nr; nr_pages -= nr; } unlock_cluster_or_swap_info(sis, ci); } /* * Caller has made sure that the swap device corresponding to entry * is still around or has not been recycled. */ void swap_free_nr(swp_entry_t entry, int nr_pages) { int nr; struct swap_info_struct *sis; unsigned long offset = swp_offset(entry); sis = _swap_info_get(entry); if (!sis) return; while (nr_pages) { nr = min_t(int, nr_pages, SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER); cluster_swap_free_nr(sis, offset, nr); offset += nr; nr_pages -= nr; } } /* * Called after dropping swapcache to decrease refcnt to swap entries. */ void put_swap_folio(struct folio *folio, swp_entry_t entry) { unsigned long offset = swp_offset(entry); unsigned long idx = offset / SWAPFILE_CLUSTER; struct swap_cluster_info *ci; struct swap_info_struct *si; unsigned char *map; unsigned int i, free_entries = 0; unsigned char val; int size = 1 << swap_entry_order(folio_order(folio)); si = _swap_info_get(entry); if (!si) return; ci = lock_cluster_or_swap_info(si, offset); if (size == SWAPFILE_CLUSTER) { map = si->swap_map + offset; for (i = 0; i < SWAPFILE_CLUSTER; i++) { val = map[i]; VM_BUG_ON(!(val & SWAP_HAS_CACHE)); if (val == SWAP_HAS_CACHE) free_entries++; } if (free_entries == SWAPFILE_CLUSTER) { unlock_cluster_or_swap_info(si, ci); spin_lock(&si->lock); mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER); swap_free_cluster(si, idx); spin_unlock(&si->lock); return; } } for (i = 0; i < size; i++, entry.val++) { if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) { unlock_cluster_or_swap_info(si, ci); free_swap_slot(entry); if (i == size - 1) return; lock_cluster_or_swap_info(si, offset); } } unlock_cluster_or_swap_info(si, ci); } static int swp_entry_cmp(const void *ent1, const void *ent2) { const swp_entry_t *e1 = ent1, *e2 = ent2; return (int)swp_type(*e1) - (int)swp_type(*e2); } void swapcache_free_entries(swp_entry_t *entries, int n) { struct swap_info_struct *p, *prev; int i; if (n <= 0) return; prev = NULL; p = NULL; /* * Sort swap entries by swap device, so each lock is only taken once. * nr_swapfiles isn't absolutely correct, but the overhead of sort() is * so low that it isn't necessary to optimize further. */ if (nr_swapfiles > 1) sort(entries, n, sizeof(entries[0]), swp_entry_cmp, NULL); for (i = 0; i < n; ++i) { p = swap_info_get_cont(entries[i], prev); if (p) swap_entry_free(p, entries[i]); prev = p; } if (p) spin_unlock(&p->lock); } int __swap_count(swp_entry_t entry) { struct swap_info_struct *si = swp_swap_info(entry); pgoff_t offset = swp_offset(entry); return swap_count(si->swap_map[offset]); } /* * How many references to @entry are currently swapped out? * This does not give an exact answer when swap count is continued, * but does include the high COUNT_CONTINUED flag to allow for that. */ int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) { pgoff_t offset = swp_offset(entry); struct swap_cluster_info *ci; int count; ci = lock_cluster_or_swap_info(si, offset); count = swap_count(si->swap_map[offset]); unlock_cluster_or_swap_info(si, ci); return count; } /* * How many references to @entry are currently swapped out? * This considers COUNT_CONTINUED so it returns exact answer. */ int swp_swapcount(swp_entry_t entry) { int count, tmp_count, n; struct swap_info_struct *p; struct swap_cluster_info *ci; struct page *page; pgoff_t offset; unsigned char *map; p = _swap_info_get(entry); if (!p) return 0; offset = swp_offset(entry); ci = lock_cluster_or_swap_info(p, offset); count = swap_count(p->swap_map[offset]); if (!(count & COUNT_CONTINUED)) goto out; count &= ~COUNT_CONTINUED; n = SWAP_MAP_MAX + 1; page = vmalloc_to_page(p->swap_map + offset); offset &= ~PAGE_MASK; VM_BUG_ON(page_private(page) != SWP_CONTINUED); do { page = list_next_entry(page, lru); map = kmap_local_page(page); tmp_count = map[offset]; kunmap_local(map); count += (tmp_count & ~COUNT_CONTINUED) * n; n *= (SWAP_CONT_MAX + 1); } while (tmp_count & COUNT_CONTINUED); out: unlock_cluster_or_swap_info(p, ci); return count; } static bool swap_page_trans_huge_swapped(struct swap_info_struct *si, swp_entry_t entry, int order) { struct swap_cluster_info *ci; unsigned char *map = si->swap_map; unsigned int nr_pages = 1 << order; unsigned long roffset = swp_offset(entry); unsigned long offset = round_down(roffset, nr_pages); int i; bool ret = false; ci = lock_cluster_or_swap_info(si, offset); if (!ci || nr_pages == 1) { if (swap_count(map[roffset])) ret = true; goto unlock_out; } for (i = 0; i < nr_pages; i++) { if (swap_count(map[offset + i])) { ret = true; break; } } unlock_out: unlock_cluster_or_swap_info(si, ci); return ret; } static bool folio_swapped(struct folio *folio) { swp_entry_t entry = folio->swap; struct swap_info_struct *si = _swap_info_get(entry); if (!si) return false; if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!folio_test_large(folio))) return swap_swapcount(si, entry) != 0; return swap_page_trans_huge_swapped(si, entry, folio_order(folio)); } /** * folio_free_swap() - Free the swap space used for this folio. * @folio: The folio to remove. * * If swap is getting full, or if there are no more mappings of this folio, * then call folio_free_swap to free its swap space. * * Return: true if we were able to release the swap space. */ bool folio_free_swap(struct folio *folio) { VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); if (!folio_test_swapcache(folio)) return false; if (folio_test_writeback(folio)) return false; if (folio_swapped(folio)) return false; /* * Once hibernation has begun to create its image of memory, * there's a danger that one of the calls to folio_free_swap() * - most probably a call from __try_to_reclaim_swap() while * hibernation is allocating its own swap pages for the image, * but conceivably even a call from memory reclaim - will free * the swap from a folio which has already been recorded in the * image as a clean swapcache folio, and then reuse its swap for * another page of the image. On waking from hibernation, the * original folio might be freed under memory pressure, then * later read back in from swap, now with the wrong data. * * Hibernation suspends storage while it is writing the image * to disk so check that here. */ if (pm_suspended_storage()) return false; delete_from_swap_cache(folio); folio_set_dirty(folio); return true; } /** * free_swap_and_cache_nr() - Release reference on range of swap entries and * reclaim their cache if no more references remain. * @entry: First entry of range. * @nr: Number of entries in range. * * For each swap entry in the contiguous range, release a reference. If any swap * entries become free, try to reclaim their underlying folios, if present. The * offset range is defined by [entry.offset, entry.offset + nr). */ void free_swap_and_cache_nr(swp_entry_t entry, int nr) { const unsigned long start_offset = swp_offset(entry); const unsigned long end_offset = start_offset + nr; unsigned int type = swp_type(entry); struct swap_info_struct *si; bool any_only_cache = false; unsigned long offset; unsigned char count; if (non_swap_entry(entry)) return; si = get_swap_device(entry); if (!si) return; if (WARN_ON(end_offset > si->max)) goto out; /* * First free all entries in the range. */ for (offset = start_offset; offset < end_offset; offset++) { if (data_race(si->swap_map[offset])) { count = __swap_entry_free(si, swp_entry(type, offset)); if (count == SWAP_HAS_CACHE) any_only_cache = true; } else { WARN_ON_ONCE(1); } } /* * Short-circuit the below loop if none of the entries had their * reference drop to zero. */ if (!any_only_cache) goto out; /* * Now go back over the range trying to reclaim the swap cache. This is * more efficient for large folios because we will only try to reclaim * the swap once per folio in the common case. If we do * __swap_entry_free() and __try_to_reclaim_swap() in the same loop, the * latter will get a reference and lock the folio for every individual * page but will only succeed once the swap slot for every subpage is * zero. */ for (offset = start_offset; offset < end_offset; offset += nr) { nr = 1; if (READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { /* * Folios are always naturally aligned in swap so * advance forward to the next boundary. Zero means no * folio was found for the swap entry, so advance by 1 * in this case. Negative value means folio was found * but could not be reclaimed. Here we can still advance * to the next boundary. */ nr = __try_to_reclaim_swap(si, offset, TTRS_UNMAPPED | TTRS_FULL); if (nr == 0) nr = 1; else if (nr < 0) nr = -nr; nr = ALIGN(offset + 1, nr) - offset; } } out: put_swap_device(si); } #ifdef CONFIG_HIBERNATION swp_entry_t get_swap_page_of_type(int type) { struct swap_info_struct *si = swap_type_to_swap_info(type); swp_entry_t entry = {0}; if (!si) goto fail; /* This is called for allocating swap entry, not cache */ spin_lock(&si->lock); if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry, 0)) atomic_long_dec(&nr_swap_pages); spin_unlock(&si->lock); fail: return entry; } /* * Find the swap type that corresponds to given device (if any). * * @offset - number of the PAGE_SIZE-sized block of the device, starting * from 0, in which the swap header is expected to be located. * * This is needed for the suspend to disk (aka swsusp). */ int swap_type_of(dev_t device, sector_t offset) { int type; if (!device) return -1; spin_lock(&swap_lock); for (type = 0; type < nr_swapfiles; type++) { struct swap_info_struct *sis = swap_info[type]; if (!(sis->flags & SWP_WRITEOK)) continue; if (device == sis->bdev->bd_dev) { struct swap_extent *se = first_se(sis); if (se->start_block == offset) { spin_unlock(&swap_lock); return type; } } } spin_unlock(&swap_lock); return -ENODEV; } int find_first_swap(dev_t *device) { int type; spin_lock(&swap_lock); for (type = 0; type < nr_swapfiles; type++) { struct swap_info_struct *sis = swap_info[type]; if (!(sis->flags & SWP_WRITEOK)) continue; *device = sis->bdev->bd_dev; spin_unlock(&swap_lock); return type; } spin_unlock(&swap_lock); return -ENODEV; } /* * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev * corresponding to given index in swap_info (swap type). */ sector_t swapdev_block(int type, pgoff_t offset) { struct swap_info_struct *si = swap_type_to_swap_info(type); struct swap_extent *se; if (!si || !(si->flags & SWP_WRITEOK)) return 0; se = offset_to_swap_extent(si, offset); return se->start_block + (offset - se->start_page); } /* * Return either the total number of swap pages of given type, or the number * of free pages of that type (depending on @free) * * This is needed for software suspend */ unsigned int count_swap_pages(int type, int free) { unsigned int n = 0; spin_lock(&swap_lock); if ((unsigned int)type < nr_swapfiles) { struct swap_info_struct *sis = swap_info[type]; spin_lock(&sis->lock); if (sis->flags & SWP_WRITEOK) { n = sis->pages; if (free) n -= sis->inuse_pages; } spin_unlock(&sis->lock); } spin_unlock(&swap_lock); return n; } #endif /* CONFIG_HIBERNATION */ static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte) { return pte_same(pte_swp_clear_flags(pte), swp_pte); } /* * No need to decide whether this PTE shares the swap entry with others, * just let do_wp_page work it out if a write is requested later - to * force COW, vm_page_prot omits write permission from any private vma. */ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, swp_entry_t entry, struct folio *folio) { struct page *page; struct folio *swapcache; spinlock_t *ptl; pte_t *pte, new_pte, old_pte; bool hwpoisoned = false; int ret = 1; swapcache = folio; folio = ksm_might_need_to_copy(folio, vma, addr); if (unlikely(!folio)) return -ENOMEM; else if (unlikely(folio == ERR_PTR(-EHWPOISON))) { hwpoisoned = true; folio = swapcache; } page = folio_file_page(folio, swp_offset(entry)); if (PageHWPoison(page)) hwpoisoned = true; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte), swp_entry_to_pte(entry)))) { ret = 0; goto out; } old_pte = ptep_get(pte); if (unlikely(hwpoisoned || !folio_test_uptodate(folio))) { swp_entry_t swp_entry; dec_mm_counter(vma->vm_mm, MM_SWAPENTS); if (hwpoisoned) { swp_entry = make_hwpoison_entry(page); } else { swp_entry = make_poisoned_swp_entry(); } new_pte = swp_entry_to_pte(swp_entry); ret = 0; goto setpte; } /* * Some architectures may have to restore extra metadata to the page * when reading from swap. This metadata may be indexed by swap entry * so this must be called before swap_free(). */ arch_swap_restore(folio_swap(entry, folio), folio); dec_mm_counter(vma->vm_mm, MM_SWAPENTS); inc_mm_counter(vma->vm_mm, MM_ANONPAGES); folio_get(folio); if (folio == swapcache) { rmap_t rmap_flags = RMAP_NONE; /* * See do_swap_page(): writeback would be problematic. * However, we do a folio_wait_writeback() just before this * call and have the folio locked. */ VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio); if (pte_swp_exclusive(old_pte)) rmap_flags |= RMAP_EXCLUSIVE; /* * We currently only expect small !anon folios, which are either * fully exclusive or fully shared. If we ever get large folios * here, we have to be careful. */ if (!folio_test_anon(folio)) { VM_WARN_ON_ONCE(folio_test_large(folio)); VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); folio_add_new_anon_rmap(folio, vma, addr, rmap_flags); } else { folio_add_anon_rmap_pte(folio, page, vma, addr, rmap_flags); } } else { /* ksm created a completely new copy */ folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE); folio_add_lru_vma(folio, vma); } new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot)); if (pte_swp_soft_dirty(old_pte)) new_pte = pte_mksoft_dirty(new_pte); if (pte_swp_uffd_wp(old_pte)) new_pte = pte_mkuffd_wp(new_pte); setpte: set_pte_at(vma->vm_mm, addr, pte, new_pte); swap_free(entry); out: if (pte) pte_unmap_unlock(pte, ptl); if (folio != swapcache) { folio_unlock(folio); folio_put(folio); } return ret; } static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned int type) { pte_t *pte = NULL; struct swap_info_struct *si; si = swap_info[type]; do { struct folio *folio; unsigned long offset; unsigned char swp_count; swp_entry_t entry; int ret; pte_t ptent; if (!pte++) { pte = pte_offset_map(pmd, addr); if (!pte) break; } ptent = ptep_get_lockless(pte); if (!is_swap_pte(ptent)) continue; entry = pte_to_swp_entry(ptent); if (swp_type(entry) != type) continue; offset = swp_offset(entry); pte_unmap(pte); pte = NULL; folio = swap_cache_get_folio(entry, vma, addr); if (!folio) { struct page *page; struct vm_fault vmf = { .vma = vma, .address = addr, .real_address = addr, .pmd = pmd, }; page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, &vmf); if (page) folio = page_folio(page); } if (!folio) { swp_count = READ_ONCE(si->swap_map[offset]); if (swp_count == 0 || swp_count == SWAP_MAP_BAD) continue; return -ENOMEM; } folio_lock(folio); folio_wait_writeback(folio); ret = unuse_pte(vma, pmd, addr, entry, folio); if (ret < 0) { folio_unlock(folio); folio_put(folio); return ret; } folio_free_swap(folio); folio_unlock(folio); folio_put(folio); } while (addr += PAGE_SIZE, addr != end); if (pte) pte_unmap(pte); return 0; } static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, unsigned int type) { pmd_t *pmd; unsigned long next; int ret; pmd = pmd_offset(pud, addr); do { cond_resched(); next = pmd_addr_end(addr, end); ret = unuse_pte_range(vma, pmd, addr, next, type); if (ret) return ret; } while (pmd++, addr = next, addr != end); return 0; } static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned int type) { pud_t *pud; unsigned long next; int ret; pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; ret = unuse_pmd_range(vma, pud, addr, next, type); if (ret) return ret; } while (pud++, addr = next, addr != end); return 0; } static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, unsigned int type) { p4d_t *p4d; unsigned long next; int ret; p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(p4d)) continue; ret = unuse_pud_range(vma, p4d, addr, next, type); if (ret) return ret; } while (p4d++, addr = next, addr != end); return 0; } static int unuse_vma(struct vm_area_struct *vma, unsigned int type) { pgd_t *pgd; unsigned long addr, end, next; int ret; addr = vma->vm_start; end = vma->vm_end; pgd = pgd_offset(vma->vm_mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; ret = unuse_p4d_range(vma, pgd, addr, next, type); if (ret) return ret; } while (pgd++, addr = next, addr != end); return 0; } static int unuse_mm(struct mm_struct *mm, unsigned int type) { struct vm_area_struct *vma; int ret = 0; VMA_ITERATOR(vmi, mm, 0); mmap_read_lock(mm); for_each_vma(vmi, vma) { if (vma->anon_vma) { ret = unuse_vma(vma, type); if (ret) break; } cond_resched(); } mmap_read_unlock(mm); return ret; } /* * Scan swap_map from current position to next entry still in use. * Return 0 if there are no inuse entries after prev till end of * the map. */ static unsigned int find_next_to_unuse(struct swap_info_struct *si, unsigned int prev) { unsigned int i; unsigned char count; /* * No need for swap_lock here: we're just looking * for whether an entry is in use, not modifying it; false * hits are okay, and sys_swapoff() has already prevented new * allocations from this area (while holding swap_lock). */ for (i = prev + 1; i < si->max; i++) { count = READ_ONCE(si->swap_map[i]); if (count && swap_count(count) != SWAP_MAP_BAD) break; if ((i % LATENCY_LIMIT) == 0) cond_resched(); } if (i == si->max) i = 0; return i; } static int try_to_unuse(unsigned int type) { struct mm_struct *prev_mm; struct mm_struct *mm; struct list_head *p; int retval = 0; struct swap_info_struct *si = swap_info[type]; struct folio *folio; swp_entry_t entry; unsigned int i; if (!READ_ONCE(si->inuse_pages)) goto success; retry: retval = shmem_unuse(type); if (retval) return retval; prev_mm = &init_mm; mmget(prev_mm); spin_lock(&mmlist_lock); p = &init_mm.mmlist; while (READ_ONCE(si->inuse_pages) && !signal_pending(current) && (p = p->next) != &init_mm.mmlist) { mm = list_entry(p, struct mm_struct, mmlist); if (!mmget_not_zero(mm)) continue; spin_unlock(&mmlist_lock); mmput(prev_mm); prev_mm = mm; retval = unuse_mm(mm, type); if (retval) { mmput(prev_mm); return retval; } /* * Make sure that we aren't completely killing * interactive performance. */ cond_resched(); spin_lock(&mmlist_lock); } spin_unlock(&mmlist_lock); mmput(prev_mm); i = 0; while (READ_ONCE(si->inuse_pages) && !signal_pending(current) && (i = find_next_to_unuse(si, i)) != 0) { entry = swp_entry(type, i); folio = filemap_get_folio(swap_address_space(entry), swap_cache_index(entry)); if (IS_ERR(folio)) continue; /* * It is conceivable that a racing task removed this folio from * swap cache just before we acquired the page lock. The folio * might even be back in swap cache on another swap area. But * that is okay, folio_free_swap() only removes stale folios. */ folio_lock(folio); folio_wait_writeback(folio); folio_free_swap(folio); folio_unlock(folio); folio_put(folio); } /* * Lets check again to see if there are still swap entries in the map. * If yes, we would need to do retry the unuse logic again. * Under global memory pressure, swap entries can be reinserted back * into process space after the mmlist loop above passes over them. * * Limit the number of retries? No: when mmget_not_zero() * above fails, that mm is likely to be freeing swap from * exit_mmap(), which proceeds at its own independent pace; * and even shmem_writepage() could have been preempted after * folio_alloc_swap(), temporarily hiding that swap. It's easy * and robust (though cpu-intensive) just to keep retrying. */ if (READ_ONCE(si->inuse_pages)) { if (!signal_pending(current)) goto retry; return -EINTR; } success: /* * Make sure that further cleanups after try_to_unuse() returns happen * after swap_range_free() reduces si->inuse_pages to 0. */ smp_mb(); return 0; } /* * After a successful try_to_unuse, if no swap is now in use, we know * we can empty the mmlist. swap_lock must be held on entry and exit. * Note that mmlist_lock nests inside swap_lock, and an mm must be * added to the mmlist just after page_duplicate - before would be racy. */ static void drain_mmlist(void) { struct list_head *p, *next; unsigned int type; for (type = 0; type < nr_swapfiles; type++) if (swap_info[type]->inuse_pages) return; spin_lock(&mmlist_lock); list_for_each_safe(p, next, &init_mm.mmlist) list_del_init(p); spin_unlock(&mmlist_lock); } /* * Free all of a swapdev's extent information */ static void destroy_swap_extents(struct swap_info_struct *sis) { while (!RB_EMPTY_ROOT(&sis->swap_extent_root)) { struct rb_node *rb = sis->swap_extent_root.rb_node; struct swap_extent *se = rb_entry(rb, struct swap_extent, rb_node); rb_erase(rb, &sis->swap_extent_root); kfree(se); } if (sis->flags & SWP_ACTIVATED) { struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; sis->flags &= ~SWP_ACTIVATED; if (mapping->a_ops->swap_deactivate) mapping->a_ops->swap_deactivate(swap_file); } } /* * Add a block range (and the corresponding page range) into this swapdev's * extent tree. * * This function rather assumes that it is called in ascending page order. */ int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block) { struct rb_node **link = &sis->swap_extent_root.rb_node, *parent = NULL; struct swap_extent *se; struct swap_extent *new_se; /* * place the new node at the right most since the * function is called in ascending page order. */ while (*link) { parent = *link; link = &parent->rb_right; } if (parent) { se = rb_entry(parent, struct swap_extent, rb_node); BUG_ON(se->start_page + se->nr_pages != start_page); if (se->start_block + se->nr_pages == start_block) { /* Merge it */ se->nr_pages += nr_pages; return 0; } } /* No merge, insert a new extent. */ new_se = kmalloc(sizeof(*se), GFP_KERNEL); if (new_se == NULL) return -ENOMEM; new_se->start_page = start_page; new_se->nr_pages = nr_pages; new_se->start_block = start_block; rb_link_node(&new_se->rb_node, parent, link); rb_insert_color(&new_se->rb_node, &sis->swap_extent_root); return 1; } EXPORT_SYMBOL_GPL(add_swap_extent); /* * A `swap extent' is a simple thing which maps a contiguous range of pages * onto a contiguous range of disk blocks. A rbtree of swap extents is * built at swapon time and is then used at swap_writepage/swap_read_folio * time for locating where on disk a page belongs. * * If the swapfile is an S_ISBLK block device, a single extent is installed. * This is done so that the main operating code can treat S_ISBLK and S_ISREG * swap files identically. * * Whether the swapdev is an S_ISREG file or an S_ISBLK blockdev, the swap * extent rbtree operates in PAGE_SIZE disk blocks. Both S_ISREG and S_ISBLK * swapfiles are handled *identically* after swapon time. * * For S_ISREG swapfiles, setup_swap_extents() will walk all the file's blocks * and will parse them into a rbtree, in PAGE_SIZE chunks. If some stray * blocks are found which do not fall within the PAGE_SIZE alignment * requirements, they are simply tossed out - we will never use those blocks * for swapping. * * For all swap devices we set S_SWAPFILE across the life of the swapon. This * prevents users from writing to the swap device, which will corrupt memory. * * The amount of disk space which a single swap extent represents varies. * Typically it is in the 1-4 megabyte range. So we can have hundreds of * extents in the rbtree. - akpm. */ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) { struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; int ret; if (S_ISBLK(inode->i_mode)) { ret = add_swap_extent(sis, 0, sis->max, 0); *span = sis->pages; return ret; } if (mapping->a_ops->swap_activate) { ret = mapping->a_ops->swap_activate(sis, swap_file, span); if (ret < 0) return ret; sis->flags |= SWP_ACTIVATED; if ((sis->flags & SWP_FS_OPS) && sio_pool_init() != 0) { destroy_swap_extents(sis); return -ENOMEM; } return ret; } return generic_swapfile_activate(sis, swap_file, span); } static int swap_node(struct swap_info_struct *p) { struct block_device *bdev; if (p->bdev) bdev = p->bdev; else bdev = p->swap_file->f_inode->i_sb->s_bdev; return bdev ? bdev->bd_disk->node_id : NUMA_NO_NODE; } static void setup_swap_info(struct swap_info_struct *p, int prio, unsigned char *swap_map, struct swap_cluster_info *cluster_info) { int i; if (prio >= 0) p->prio = prio; else p->prio = --least_priority; /* * the plist prio is negated because plist ordering is * low-to-high, while swap ordering is high-to-low */ p->list.prio = -p->prio; for_each_node(i) { if (p->prio >= 0) p->avail_lists[i].prio = -p->prio; else { if (swap_node(p) == i) p->avail_lists[i].prio = 1; else p->avail_lists[i].prio = -p->prio; } } p->swap_map = swap_map; p->cluster_info = cluster_info; } static void _enable_swap_info(struct swap_info_struct *p) { p->flags |= SWP_WRITEOK; atomic_long_add(p->pages, &nr_swap_pages); total_swap_pages += p->pages; assert_spin_locked(&swap_lock); /* * both lists are plists, and thus priority ordered. * swap_active_head needs to be priority ordered for swapoff(), * which on removal of any swap_info_struct with an auto-assigned * (i.e. negative) priority increments the auto-assigned priority * of any lower-priority swap_info_structs. * swap_avail_head needs to be priority ordered for folio_alloc_swap(), * which allocates swap pages from the highest available priority * swap_info_struct. */ plist_add(&p->list, &swap_active_head); /* add to available list iff swap device is not full */ if (p->highest_bit) add_to_avail_list(p); } static void enable_swap_info(struct swap_info_struct *p, int prio, unsigned char *swap_map, struct swap_cluster_info *cluster_info) { spin_lock(&swap_lock); spin_lock(&p->lock); setup_swap_info(p, prio, swap_map, cluster_info); spin_unlock(&p->lock); spin_unlock(&swap_lock); /* * Finished initializing swap device, now it's safe to reference it. */ percpu_ref_resurrect(&p->users); spin_lock(&swap_lock); spin_lock(&p->lock); _enable_swap_info(p); spin_unlock(&p->lock); spin_unlock(&swap_lock); } static void reinsert_swap_info(struct swap_info_struct *p) { spin_lock(&swap_lock); spin_lock(&p->lock); setup_swap_info(p, p->prio, p->swap_map, p->cluster_info); _enable_swap_info(p); spin_unlock(&p->lock); spin_unlock(&swap_lock); } static bool __has_usable_swap(void) { return !plist_head_empty(&swap_active_head); } bool has_usable_swap(void) { bool ret; spin_lock(&swap_lock); ret = __has_usable_swap(); spin_unlock(&swap_lock); return ret; } SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) { struct swap_info_struct *p = NULL; unsigned char *swap_map; struct swap_cluster_info *cluster_info; struct file *swap_file, *victim; struct address_space *mapping; struct inode *inode; struct filename *pathname; int err, found = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; BUG_ON(!current->mm); pathname = getname(specialfile); if (IS_ERR(pathname)) return PTR_ERR(pathname); victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0); err = PTR_ERR(victim); if (IS_ERR(victim)) goto out; mapping = victim->f_mapping; spin_lock(&swap_lock); plist_for_each_entry(p, &swap_active_head, list) { if (p->flags & SWP_WRITEOK) { if (p->swap_file->f_mapping == mapping) { found = 1; break; } } } if (!found) { err = -EINVAL; spin_unlock(&swap_lock); goto out_dput; } if (!security_vm_enough_memory_mm(current->mm, p->pages)) vm_unacct_memory(p->pages); else { err = -ENOMEM; spin_unlock(&swap_lock); goto out_dput; } spin_lock(&p->lock); del_from_avail_list(p); if (p->prio < 0) { struct swap_info_struct *si = p; int nid; plist_for_each_entry_continue(si, &swap_active_head, list) { si->prio++; si->list.prio--; for_each_node(nid) { if (si->avail_lists[nid].prio != 1) si->avail_lists[nid].prio--; } } least_priority++; } plist_del(&p->list, &swap_active_head); atomic_long_sub(p->pages, &nr_swap_pages); total_swap_pages -= p->pages; p->flags &= ~SWP_WRITEOK; spin_unlock(&p->lock); spin_unlock(&swap_lock); disable_swap_slots_cache_lock(); set_current_oom_origin(); err = try_to_unuse(p->type); clear_current_oom_origin(); if (err) { /* re-insert swap space back into swap_list */ reinsert_swap_info(p); reenable_swap_slots_cache_unlock(); goto out_dput; } reenable_swap_slots_cache_unlock(); /* * Wait for swap operations protected by get/put_swap_device() * to complete. Because of synchronize_rcu() here, all swap * operations protected by RCU reader side lock (including any * spinlock) will be waited too. This makes it easy to * prevent folio_test_swapcache() and the following swap cache * operations from racing with swapoff. */ percpu_ref_kill(&p->users); synchronize_rcu(); wait_for_completion(&p->comp); flush_work(&p->discard_work); destroy_swap_extents(p); if (p->flags & SWP_CONTINUED) free_swap_count_continuations(p); if (!p->bdev || !bdev_nonrot(p->bdev)) atomic_dec(&nr_rotate_swap); mutex_lock(&swapon_mutex); spin_lock(&swap_lock); spin_lock(&p->lock); drain_mmlist(); /* wait for anyone still in scan_swap_map_slots */ p->highest_bit = 0; /* cuts scans short */ while (p->flags >= SWP_SCANNING) { spin_unlock(&p->lock); spin_unlock(&swap_lock); schedule_timeout_uninterruptible(1); spin_lock(&swap_lock); spin_lock(&p->lock); } swap_file = p->swap_file; p->swap_file = NULL; p->max = 0; swap_map = p->swap_map; p->swap_map = NULL; cluster_info = p->cluster_info; p->cluster_info = NULL; spin_unlock(&p->lock); spin_unlock(&swap_lock); arch_swap_invalidate_area(p->type); zswap_swapoff(p->type); mutex_unlock(&swapon_mutex); free_percpu(p->percpu_cluster); p->percpu_cluster = NULL; free_percpu(p->cluster_next_cpu); p->cluster_next_cpu = NULL; vfree(swap_map); kvfree(cluster_info); /* Destroy swap account information */ swap_cgroup_swapoff(p->type); exit_swap_address_space(p->type); inode = mapping->host; inode_lock(inode); inode->i_flags &= ~S_SWAPFILE; inode_unlock(inode); filp_close(swap_file, NULL); /* * Clear the SWP_USED flag after all resources are freed so that swapon * can reuse this swap_info in alloc_swap_info() safely. It is ok to * not hold p->lock after we cleared its SWP_WRITEOK. */ spin_lock(&swap_lock); p->flags = 0; spin_unlock(&swap_lock); err = 0; atomic_inc(&proc_poll_event); wake_up_interruptible(&proc_poll_wait); out_dput: filp_close(victim, NULL); out: putname(pathname); return err; } #ifdef CONFIG_PROC_FS static __poll_t swaps_poll(struct file *file, poll_table *wait) { struct seq_file *seq = file->private_data; poll_wait(file, &proc_poll_wait, wait); if (seq->poll_event != atomic_read(&proc_poll_event)) { seq->poll_event = atomic_read(&proc_poll_event); return EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLPRI; } return EPOLLIN | EPOLLRDNORM; } /* iterator */ static void *swap_start(struct seq_file *swap, loff_t *pos) { struct swap_info_struct *si; int type; loff_t l = *pos; mutex_lock(&swapon_mutex); if (!l) return SEQ_START_TOKEN; for (type = 0; (si = swap_type_to_swap_info(type)); type++) { if (!(si->flags & SWP_USED) || !si->swap_map) continue; if (!--l) return si; } return NULL; } static void *swap_next(struct seq_file *swap, void *v, loff_t *pos) { struct swap_info_struct *si = v; int type; if (v == SEQ_START_TOKEN) type = 0; else type = si->type + 1; ++(*pos); for (; (si = swap_type_to_swap_info(type)); type++) { if (!(si->flags & SWP_USED) || !si->swap_map) continue; return si; } return NULL; } static void swap_stop(struct seq_file *swap, void *v) { mutex_unlock(&swapon_mutex); } static int swap_show(struct seq_file *swap, void *v) { struct swap_info_struct *si = v; struct file *file; int len; unsigned long bytes, inuse; if (si == SEQ_START_TOKEN) { seq_puts(swap, "Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n"); return 0; } bytes = K(si->pages); inuse = K(READ_ONCE(si->inuse_pages)); file = si->swap_file; len = seq_file_path(swap, file, " \t\n\\"); seq_printf(swap, "%*s%s\t%lu\t%s%lu\t%s%d\n", len < 40 ? 40 - len : 1, " ", S_ISBLK(file_inode(file)->i_mode) ? "partition" : "file\t", bytes, bytes < 10000000 ? "\t" : "", inuse, inuse < 10000000 ? "\t" : "", si->prio); return 0; } static const struct seq_operations swaps_op = { .start = swap_start, .next = swap_next, .stop = swap_stop, .show = swap_show }; static int swaps_open(struct inode *inode, struct file *file) { struct seq_file *seq; int ret; ret = seq_open(file, &swaps_op); if (ret) return ret; seq = file->private_data; seq->poll_event = atomic_read(&proc_poll_event); return 0; } static const struct proc_ops swaps_proc_ops = { .proc_flags = PROC_ENTRY_PERMANENT, .proc_open = swaps_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = seq_release, .proc_poll = swaps_poll, }; static int __init procswaps_init(void) { proc_create("swaps", 0, NULL, &swaps_proc_ops); return 0; } __initcall(procswaps_init); #endif /* CONFIG_PROC_FS */ #ifdef MAX_SWAPFILES_CHECK static int __init max_swapfiles_check(void) { MAX_SWAPFILES_CHECK(); return 0; } late_initcall(max_swapfiles_check); #endif static struct swap_info_struct *alloc_swap_info(void) { struct swap_info_struct *p; struct swap_info_struct *defer = NULL; unsigned int type; int i; p = kvzalloc(struct_size(p, avail_lists, nr_node_ids), GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); if (percpu_ref_init(&p->users, swap_users_ref_free, PERCPU_REF_INIT_DEAD, GFP_KERNEL)) { kvfree(p); return ERR_PTR(-ENOMEM); } spin_lock(&swap_lock); for (type = 0; type < nr_swapfiles; type++) { if (!(swap_info[type]->flags & SWP_USED)) break; } if (type >= MAX_SWAPFILES) { spin_unlock(&swap_lock); percpu_ref_exit(&p->users); kvfree(p); return ERR_PTR(-EPERM); } if (type >= nr_swapfiles) { p->type = type; /* * Publish the swap_info_struct after initializing it. * Note that kvzalloc() above zeroes all its fields. */ smp_store_release(&swap_info[type], p); /* rcu_assign_pointer() */ nr_swapfiles++; } else { defer = p; p = swap_info[type]; /* * Do not memset this entry: a racing procfs swap_next() * would be relying on p->type to remain valid. */ } p->swap_extent_root = RB_ROOT; plist_node_init(&p->list, 0); for_each_node(i) plist_node_init(&p->avail_lists[i], 0); p->flags = SWP_USED; spin_unlock(&swap_lock); if (defer) { percpu_ref_exit(&defer->users); kvfree(defer); } spin_lock_init(&p->lock); spin_lock_init(&p->cont_lock); init_completion(&p->comp); return p; } static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) { if (S_ISBLK(inode->i_mode)) { p->bdev = I_BDEV(inode); /* * Zoned block devices contain zones that have a sequential * write only restriction. Hence zoned block devices are not * suitable for swapping. Disallow them here. */ if (bdev_is_zoned(p->bdev)) return -EINVAL; p->flags |= SWP_BLKDEV; } else if (S_ISREG(inode->i_mode)) { p->bdev = inode->i_sb->s_bdev; } return 0; } /* * Find out how many pages are allowed for a single swap device. There * are two limiting factors: * 1) the number of bits for the swap offset in the swp_entry_t type, and * 2) the number of bits in the swap pte, as defined by the different * architectures. * * In order to find the largest possible bit mask, a swap entry with * swap type 0 and swap offset ~0UL is created, encoded to a swap pte, * decoded to a swp_entry_t again, and finally the swap offset is * extracted. * * This will mask all the bits from the initial ~0UL mask that can't * be encoded in either the swp_entry_t or the architecture definition * of a swap pte. */ unsigned long generic_max_swapfile_size(void) { return swp_offset(pte_to_swp_entry( swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; } /* Can be overridden by an architecture for additional checks. */ __weak unsigned long arch_max_swapfile_size(void) { return generic_max_swapfile_size(); } static unsigned long read_swap_header(struct swap_info_struct *p, union swap_header *swap_header, struct inode *inode) { int i; unsigned long maxpages; unsigned long swapfilepages; unsigned long last_page; if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { pr_err("Unable to find swap-space signature\n"); return 0; } /* swap partition endianness hack... */ if (swab32(swap_header->info.version) == 1) { swab32s(&swap_header->info.version); swab32s(&swap_header->info.last_page); swab32s(&swap_header->info.nr_badpages); if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) return 0; for (i = 0; i < swap_header->info.nr_badpages; i++) swab32s(&swap_header->info.badpages[i]); } /* Check the swap header's sub-version */ if (swap_header->info.version != 1) { pr_warn("Unable to handle swap header version %d\n", swap_header->info.version); return 0; } p->lowest_bit = 1; p->cluster_next = 1; p->cluster_nr = 0; maxpages = swapfile_maximum_size; last_page = swap_header->info.last_page; if (!last_page) { pr_warn("Empty swap-file\n"); return 0; } if (last_page > maxpages) { pr_warn("Truncating oversized swap area, only using %luk out of %luk\n", K(maxpages), K(last_page)); } if (maxpages > last_page) { maxpages = last_page + 1; /* p->max is an unsigned int: don't overflow it */ if ((unsigned int)maxpages == 0) maxpages = UINT_MAX; } p->highest_bit = maxpages - 1; if (!maxpages) return 0; swapfilepages = i_size_read(inode) >> PAGE_SHIFT; if (swapfilepages && maxpages > swapfilepages) { pr_warn("Swap area shorter than signature indicates\n"); return 0; } if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) return 0; if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) return 0; return maxpages; } #define SWAP_CLUSTER_INFO_COLS \ DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info)) #define SWAP_CLUSTER_SPACE_COLS \ DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER) #define SWAP_CLUSTER_COLS \ max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS) static int setup_swap_map_and_extents(struct swap_info_struct *p, union swap_header *swap_header, unsigned char *swap_map, struct swap_cluster_info *cluster_info, unsigned long maxpages, sector_t *span) { unsigned int j, k; unsigned int nr_good_pages; int nr_extents; unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); unsigned long col = p->cluster_next / SWAPFILE_CLUSTER % SWAP_CLUSTER_COLS; unsigned long i, idx; nr_good_pages = maxpages - 1; /* omit header page */ cluster_list_init(&p->free_clusters); cluster_list_init(&p->discard_clusters); for (i = 0; i < swap_header->info.nr_badpages; i++) { unsigned int page_nr = swap_header->info.badpages[i]; if (page_nr == 0 || page_nr > swap_header->info.last_page) return -EINVAL; if (page_nr < maxpages) { swap_map[page_nr] = SWAP_MAP_BAD; nr_good_pages--; /* * Haven't marked the cluster free yet, no list * operation involved */ inc_cluster_info_page(p, cluster_info, page_nr); } } /* Haven't marked the cluster free yet, no list operation involved */ for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++) inc_cluster_info_page(p, cluster_info, i); if (nr_good_pages) { swap_map[0] = SWAP_MAP_BAD; /* * Not mark the cluster free yet, no list * operation involved */ inc_cluster_info_page(p, cluster_info, 0); p->max = maxpages; p->pages = nr_good_pages; nr_extents = setup_swap_extents(p, span); if (nr_extents < 0) return nr_extents; nr_good_pages = p->pages; } if (!nr_good_pages) { pr_warn("Empty swap-file\n"); return -EINVAL; } if (!cluster_info) return nr_extents; /* * Reduce false cache line sharing between cluster_info and * sharing same address space. */ for (k = 0; k < SWAP_CLUSTER_COLS; k++) { j = (k + col) % SWAP_CLUSTER_COLS; for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) { idx = i * SWAP_CLUSTER_COLS + j; if (idx >= nr_clusters) continue; if (cluster_count(&cluster_info[idx])) continue; cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE); cluster_list_add_tail(&p->free_clusters, cluster_info, idx); } } return nr_extents; } SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) { struct swap_info_struct *p; struct filename *name; struct file *swap_file = NULL; struct address_space *mapping; struct dentry *dentry; int prio; int error; union swap_header *swap_header; int nr_extents; sector_t span; unsigned long maxpages; unsigned char *swap_map = NULL; struct swap_cluster_info *cluster_info = NULL; struct page *page = NULL; struct inode *inode = NULL; bool inced_nr_rotate_swap = false; if (swap_flags & ~SWAP_FLAGS_VALID) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!swap_avail_heads) return -ENOMEM; p = alloc_swap_info(); if (IS_ERR(p)) return PTR_ERR(p); INIT_WORK(&p->discard_work, swap_discard_work); name = getname(specialfile); if (IS_ERR(name)) { error = PTR_ERR(name); name = NULL; goto bad_swap; } swap_file = file_open_name(name, O_RDWR | O_LARGEFILE | O_EXCL, 0); if (IS_ERR(swap_file)) { error = PTR_ERR(swap_file); swap_file = NULL; goto bad_swap; } p->swap_file = swap_file; mapping = swap_file->f_mapping; dentry = swap_file->f_path.dentry; inode = mapping->host; error = claim_swapfile(p, inode); if (unlikely(error)) goto bad_swap; inode_lock(inode); if (d_unlinked(dentry) || cant_mount(dentry)) { error = -ENOENT; goto bad_swap_unlock_inode; } if (IS_SWAPFILE(inode)) { error = -EBUSY; goto bad_swap_unlock_inode; } /* * Read the swap header. */ if (!mapping->a_ops->read_folio) { error = -EINVAL; goto bad_swap_unlock_inode; } page = read_mapping_page(mapping, 0, swap_file); if (IS_ERR(page)) { error = PTR_ERR(page); goto bad_swap_unlock_inode; } swap_header = kmap(page); maxpages = read_swap_header(p, swap_header, inode); if (unlikely(!maxpages)) { error = -EINVAL; goto bad_swap_unlock_inode; } /* OK, set up the swap map and apply the bad block list */ swap_map = vzalloc(maxpages); if (!swap_map) { error = -ENOMEM; goto bad_swap_unlock_inode; } if (p->bdev && bdev_stable_writes(p->bdev)) p->flags |= SWP_STABLE_WRITES; if (p->bdev && bdev_synchronous(p->bdev)) p->flags |= SWP_SYNCHRONOUS_IO; if (p->bdev && bdev_nonrot(p->bdev)) { int cpu, i; unsigned long ci, nr_cluster; p->flags |= SWP_SOLIDSTATE; p->cluster_next_cpu = alloc_percpu(unsigned int); if (!p->cluster_next_cpu) { error = -ENOMEM; goto bad_swap_unlock_inode; } /* * select a random position to start with to help wear leveling * SSD */ for_each_possible_cpu(cpu) { per_cpu(*p->cluster_next_cpu, cpu) = get_random_u32_inclusive(1, p->highest_bit); } nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); cluster_info = kvcalloc(nr_cluster, sizeof(*cluster_info), GFP_KERNEL); if (!cluster_info) { error = -ENOMEM; goto bad_swap_unlock_inode; } for (ci = 0; ci < nr_cluster; ci++) spin_lock_init(&((cluster_info + ci)->lock)); p->percpu_cluster = alloc_percpu(struct percpu_cluster); if (!p->percpu_cluster) { error = -ENOMEM; goto bad_swap_unlock_inode; } for_each_possible_cpu(cpu) { struct percpu_cluster *cluster; cluster = per_cpu_ptr(p->percpu_cluster, cpu); for (i = 0; i < SWAP_NR_ORDERS; i++) cluster->next[i] = SWAP_NEXT_INVALID; } } else { atomic_inc(&nr_rotate_swap); inced_nr_rotate_swap = true; } error = swap_cgroup_swapon(p->type, maxpages); if (error) goto bad_swap_unlock_inode; nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map, cluster_info, maxpages, &span); if (unlikely(nr_extents < 0)) { error = nr_extents; goto bad_swap_unlock_inode; } if ((swap_flags & SWAP_FLAG_DISCARD) && p->bdev && bdev_max_discard_sectors(p->bdev)) { /* * When discard is enabled for swap with no particular * policy flagged, we set all swap discard flags here in * order to sustain backward compatibility with older * swapon(8) releases. */ p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD | SWP_PAGE_DISCARD); /* * By flagging sys_swapon, a sysadmin can tell us to * either do single-time area discards only, or to just * perform discards for released swap page-clusters. * Now it's time to adjust the p->flags accordingly. */ if (swap_flags & SWAP_FLAG_DISCARD_ONCE) p->flags &= ~SWP_PAGE_DISCARD; else if (swap_flags & SWAP_FLAG_DISCARD_PAGES) p->flags &= ~SWP_AREA_DISCARD; /* issue a swapon-time discard if it's still required */ if (p->flags & SWP_AREA_DISCARD) { int err = discard_swap(p); if (unlikely(err)) pr_err("swapon: discard_swap(%p): %d\n", p, err); } } error = init_swap_address_space(p->type, maxpages); if (error) goto bad_swap_unlock_inode; error = zswap_swapon(p->type, maxpages); if (error) goto free_swap_address_space; /* * Flush any pending IO and dirty mappings before we start using this * swap device. */ inode->i_flags |= S_SWAPFILE; error = inode_drain_writes(inode); if (error) { inode->i_flags &= ~S_SWAPFILE; goto free_swap_zswap; } mutex_lock(&swapon_mutex); prio = -1; if (swap_flags & SWAP_FLAG_PREFER) prio = (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; enable_swap_info(p, prio, swap_map, cluster_info); pr_info("Adding %uk swap on %s. Priority:%d extents:%d across:%lluk %s%s%s%s\n", K(p->pages), name->name, p->prio, nr_extents, K((unsigned long long)span), (p->flags & SWP_SOLIDSTATE) ? "SS" : "", (p->flags & SWP_DISCARDABLE) ? "D" : "", (p->flags & SWP_AREA_DISCARD) ? "s" : "", (p->flags & SWP_PAGE_DISCARD) ? "c" : ""); mutex_unlock(&swapon_mutex); atomic_inc(&proc_poll_event); wake_up_interruptible(&proc_poll_wait); error = 0; goto out; free_swap_zswap: zswap_swapoff(p->type); free_swap_address_space: exit_swap_address_space(p->type); bad_swap_unlock_inode: inode_unlock(inode); bad_swap: free_percpu(p->percpu_cluster); p->percpu_cluster = NULL; free_percpu(p->cluster_next_cpu); p->cluster_next_cpu = NULL; inode = NULL; destroy_swap_extents(p); swap_cgroup_swapoff(p->type); spin_lock(&swap_lock); p->swap_file = NULL; p->flags = 0; spin_unlock(&swap_lock); vfree(swap_map); kvfree(cluster_info); if (inced_nr_rotate_swap) atomic_dec(&nr_rotate_swap); if (swap_file) filp_close(swap_file, NULL); out: if (page && !IS_ERR(page)) { kunmap(page); put_page(page); } if (name) putname(name); if (inode) inode_unlock(inode); if (!error) enable_swap_slots_cache(); return error; } void si_swapinfo(struct sysinfo *val) { unsigned int type; unsigned long nr_to_be_unused = 0; spin_lock(&swap_lock); for (type = 0; type < nr_swapfiles; type++) { struct swap_info_struct *si = swap_info[type]; if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK)) nr_to_be_unused += READ_ONCE(si->inuse_pages); } val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused; val->totalswap = total_swap_pages + nr_to_be_unused; spin_unlock(&swap_lock); } /* * Verify that a swap entry is valid and increment its swap map count. * * Returns error code in following case. * - success -> 0 * - swp_entry is invalid -> EINVAL * - swp_entry is migration entry -> EINVAL * - swap-cache reference is requested but there is already one. -> EEXIST * - swap-cache reference is requested but the entry is not used. -> ENOENT * - swap-mapped reference requested but needs continued swap count. -> ENOMEM */ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) { struct swap_info_struct *p; struct swap_cluster_info *ci; unsigned long offset; unsigned char count; unsigned char has_cache; int err; p = swp_swap_info(entry); offset = swp_offset(entry); ci = lock_cluster_or_swap_info(p, offset); count = p->swap_map[offset]; /* * swapin_readahead() doesn't check if a swap entry is valid, so the * swap entry could be SWAP_MAP_BAD. Check here with lock held. */ if (unlikely(swap_count(count) == SWAP_MAP_BAD)) { err = -ENOENT; goto unlock_out; } has_cache = count & SWAP_HAS_CACHE; count &= ~SWAP_HAS_CACHE; err = 0; if (usage == SWAP_HAS_CACHE) { /* set SWAP_HAS_CACHE if there is no cache and entry is used */ if (!has_cache && count) has_cache = SWAP_HAS_CACHE; else if (has_cache) /* someone else added cache */ err = -EEXIST; else /* no users remaining */ err = -ENOENT; } else if (count || has_cache) { if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX) count += usage; else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX) err = -EINVAL; else if (swap_count_continued(p, offset, count)) count = COUNT_CONTINUED; else err = -ENOMEM; } else err = -ENOENT; /* unused swap entry */ if (!err) WRITE_ONCE(p->swap_map[offset], count | has_cache); unlock_out: unlock_cluster_or_swap_info(p, ci); return err; } /* * Help swapoff by noting that swap entry belongs to shmem/tmpfs * (in which case its reference count is never incremented). */ void swap_shmem_alloc(swp_entry_t entry) { __swap_duplicate(entry, SWAP_MAP_SHMEM); } /* * Increase reference count of swap entry by 1. * Returns 0 for success, or -ENOMEM if a swap_count_continuation is required * but could not be atomically allocated. Returns 0, just as if it succeeded, * if __swap_duplicate() fails for another reason (-EINVAL or -ENOENT), which * might occur if a page table entry has got corrupted. */ int swap_duplicate(swp_entry_t entry) { int err = 0; while (!err && __swap_duplicate(entry, 1) == -ENOMEM) err = add_swap_count_continuation(entry, GFP_ATOMIC); return err; } /* * @entry: swap entry for which we allocate swap cache. * * Called when allocating swap cache for existing swap entry, * This can return error codes. Returns 0 at success. * -EEXIST means there is a swap cache. * Note: return code is different from swap_duplicate(). */ int swapcache_prepare(swp_entry_t entry) { return __swap_duplicate(entry, SWAP_HAS_CACHE); } void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) { struct swap_cluster_info *ci; unsigned long offset = swp_offset(entry); unsigned char usage; ci = lock_cluster_or_swap_info(si, offset); usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE); unlock_cluster_or_swap_info(si, ci); if (!usage) free_swap_slot(entry); } struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry)); } /* * out-of-line methods to avoid include hell. */ struct address_space *swapcache_mapping(struct folio *folio) { return swp_swap_info(folio->swap)->swap_file->f_mapping; } EXPORT_SYMBOL_GPL(swapcache_mapping); pgoff_t __folio_swap_cache_index(struct folio *folio) { return swap_cache_index(folio->swap); } EXPORT_SYMBOL_GPL(__folio_swap_cache_index); /* * add_swap_count_continuation - called when a swap count is duplicated * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's * page of the original vmalloc'ed swap_map, to hold the continuation count * (for that entry and for its neighbouring PAGE_SIZE swap entries). Called * again when count is duplicated beyond SWAP_MAP_MAX * SWAP_CONT_MAX, etc. * * These continuation pages are seldom referenced: the common paths all work * on the original swap_map, only referring to a continuation page when the * low "digit" of a count is incremented or decremented through SWAP_MAP_MAX. * * add_swap_count_continuation(, GFP_ATOMIC) can be called while holding * page table locks; if it fails, add_swap_count_continuation(, GFP_KERNEL) * can be called after dropping locks. */ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) { struct swap_info_struct *si; struct swap_cluster_info *ci; struct page *head; struct page *page; struct page *list_page; pgoff_t offset; unsigned char count; int ret = 0; /* * When debugging, it's easier to use __GFP_ZERO here; but it's better * for latency not to zero a page while GFP_ATOMIC and holding locks. */ page = alloc_page(gfp_mask | __GFP_HIGHMEM); si = get_swap_device(entry); if (!si) { /* * An acceptable race has occurred since the failing * __swap_duplicate(): the swap device may be swapoff */ goto outer; } spin_lock(&si->lock); offset = swp_offset(entry); ci = lock_cluster(si, offset); count = swap_count(si->swap_map[offset]); if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) { /* * The higher the swap count, the more likely it is that tasks * will race to add swap count continuation: we need to avoid * over-provisioning. */ goto out; } if (!page) { ret = -ENOMEM; goto out; } head = vmalloc_to_page(si->swap_map + offset); offset &= ~PAGE_MASK; spin_lock(&si->cont_lock); /* * Page allocation does not initialize the page's lru field, * but it does always reset its private field. */ if (!page_private(head)) { BUG_ON(count & COUNT_CONTINUED); INIT_LIST_HEAD(&head->lru); set_page_private(head, SWP_CONTINUED); si->flags |= SWP_CONTINUED; } list_for_each_entry(list_page, &head->lru, lru) { unsigned char *map; /* * If the previous map said no continuation, but we've found * a continuation page, free our allocation and use this one. */ if (!(count & COUNT_CONTINUED)) goto out_unlock_cont; map = kmap_local_page(list_page) + offset; count = *map; kunmap_local(map); /* * If this continuation count now has some space in it, * free our allocation and use this one. */ if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX) goto out_unlock_cont; } list_add_tail(&page->lru, &head->lru); page = NULL; /* now it's attached, don't free it */ out_unlock_cont: spin_unlock(&si->cont_lock); out: unlock_cluster(ci); spin_unlock(&si->lock); put_swap_device(si); outer: if (page) __free_page(page); return ret; } /* * swap_count_continued - when the original swap_map count is incremented * from SWAP_MAP_MAX, check if there is already a continuation page to carry * into, carry if so, or else fail until a new continuation page is allocated; * when the original swap_map count is decremented from 0 with continuation, * borrow from the continuation and report whether it still holds more. * Called while __swap_duplicate() or swap_entry_free() holds swap or cluster * lock. */ static bool swap_count_continued(struct swap_info_struct *si, pgoff_t offset, unsigned char count) { struct page *head; struct page *page; unsigned char *map; bool ret; head = vmalloc_to_page(si->swap_map + offset); if (page_private(head) != SWP_CONTINUED) { BUG_ON(count & COUNT_CONTINUED); return false; /* need to add count continuation */ } spin_lock(&si->cont_lock); offset &= ~PAGE_MASK; page = list_next_entry(head, lru); map = kmap_local_page(page) + offset; if (count == SWAP_MAP_MAX) /* initial increment from swap_map */ goto init_map; /* jump over SWAP_CONT_MAX checks */ if (count == (SWAP_MAP_MAX | COUNT_CONTINUED)) { /* incrementing */ /* * Think of how you add 1 to 999 */ while (*map == (SWAP_CONT_MAX | COUNT_CONTINUED)) { kunmap_local(map); page = list_next_entry(page, lru); BUG_ON(page == head); map = kmap_local_page(page) + offset; } if (*map == SWAP_CONT_MAX) { kunmap_local(map); page = list_next_entry(page, lru); if (page == head) { ret = false; /* add count continuation */ goto out; } map = kmap_local_page(page) + offset; init_map: *map = 0; /* we didn't zero the page */ } *map += 1; kunmap_local(map); while ((page = list_prev_entry(page, lru)) != head) { map = kmap_local_page(page) + offset; *map = COUNT_CONTINUED; kunmap_local(map); } ret = true; /* incremented */ } else { /* decrementing */ /* * Think of how you subtract 1 from 1000 */ BUG_ON(count != COUNT_CONTINUED); while (*map == COUNT_CONTINUED) { kunmap_local(map); page = list_next_entry(page, lru); BUG_ON(page == head); map = kmap_local_page(page) + offset; } BUG_ON(*map == 0); *map -= 1; if (*map == 0) count = 0; kunmap_local(map); while ((page = list_prev_entry(page, lru)) != head) { map = kmap_local_page(page) + offset; *map = SWAP_CONT_MAX | count; count = COUNT_CONTINUED; kunmap_local(map); } ret = count == COUNT_CONTINUED; } out: spin_unlock(&si->cont_lock); return ret; } /* * free_swap_count_continuations - swapoff free all the continuation pages * appended to the swap_map, after swap_map is quiesced, before vfree'ing it. */ static void free_swap_count_continuations(struct swap_info_struct *si) { pgoff_t offset; for (offset = 0; offset < si->max; offset += PAGE_SIZE) { struct page *head; head = vmalloc_to_page(si->swap_map + offset); if (page_private(head)) { struct page *page, *next; list_for_each_entry_safe(page, next, &head->lru, lru) { list_del(&page->lru); __free_page(page); } } } } #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) void __folio_throttle_swaprate(struct folio *folio, gfp_t gfp) { struct swap_info_struct *si, *next; int nid = folio_nid(folio); if (!(gfp & __GFP_IO)) return; if (!__has_usable_swap()) return; if (!blk_cgroup_congested()) return; /* * We've already scheduled a throttle, avoid taking the global swap * lock. */ if (current->throttle_disk) return; spin_lock(&swap_avail_lock); plist_for_each_entry_safe(si, next, &swap_avail_heads[nid], avail_lists[nid]) { if (si->bdev) { blkcg_schedule_throttle(si->bdev->bd_disk, true); break; } } spin_unlock(&swap_avail_lock); } #endif static int __init swapfile_init(void) { int nid; swap_avail_heads = kmalloc_array(nr_node_ids, sizeof(struct plist_head), GFP_KERNEL); if (!swap_avail_heads) { pr_emerg("Not enough memory for swap heads, swap is disabled\n"); return -ENOMEM; } for_each_node(nid) plist_head_init(&swap_avail_heads[nid]); swapfile_maximum_size = arch_max_swapfile_size(); #ifdef CONFIG_MIGRATION if (swapfile_maximum_size >= (1UL << SWP_MIG_TOTAL_BITS)) swap_migration_ad_supported = true; #endif /* CONFIG_MIGRATION */ return 0; } subsys_initcall(swapfile_init);
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 // SPDX-License-Identifier: GPL-2.0-only /* Network filesystem high-level buffered write support. * * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/export.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/pagevec.h> #include "internal.h" static void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) { if (netfs_group) folio_attach_private(folio, netfs_get_group(netfs_group)); } static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) { void *priv = folio_get_private(folio); if (unlikely(priv != netfs_group)) { if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE)) folio_attach_private(folio, netfs_get_group(netfs_group)); else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE) folio_detach_private(folio); } } /* * Grab a folio for writing and lock it. Attempt to allocate as large a folio * as possible to hold as much of the remaining length as possible in one go. */ static struct folio *netfs_grab_folio_for_write(struct address_space *mapping, loff_t pos, size_t part) { pgoff_t index = pos / PAGE_SIZE; fgf_t fgp_flags = FGP_WRITEBEGIN; if (mapping_large_folio_support(mapping)) fgp_flags |= fgf_set_order(pos % PAGE_SIZE + part); return __filemap_get_folio(mapping, index, fgp_flags, mapping_gfp_mask(mapping)); } /* * Update i_size and estimate the update to i_blocks to reflect the additional * data written into the pagecache until we can find out from the server what * the values actually are. */ static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, loff_t i_size, loff_t pos, size_t copied) { blkcnt_t add; size_t gap; if (ctx->ops->update_i_size) { ctx->ops->update_i_size(inode, pos); return; } i_size_write(inode, pos); #if IS_ENABLED(CONFIG_FSCACHE) fscache_update_cookie(ctx->cache, NULL, &pos); #endif gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1)); if (copied > gap) { add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE); inode->i_blocks = min_t(blkcnt_t, DIV_ROUND_UP(pos, SECTOR_SIZE), inode->i_blocks + add); } } /** * netfs_perform_write - Copy data into the pagecache. * @iocb: The operation parameters * @iter: The source buffer * @netfs_group: Grouping for dirty pages (eg. ceph snaps). * * Copy data into pagecache pages attached to the inode specified by @iocb. * The caller must hold appropriate inode locks. * * Dirty pages are tagged with a netfs_folio struct if they're not up to date * to indicate the range modified. Dirty pages may also be tagged with a * netfs-specific grouping such that data from an old group gets flushed before * a new one is started. */ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, struct netfs_group *netfs_group) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct address_space *mapping = inode->i_mapping; struct netfs_inode *ctx = netfs_inode(inode); struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, .for_sync = true, .nr_to_write = LONG_MAX, .range_start = iocb->ki_pos, .range_end = iocb->ki_pos + iter->count, }; struct netfs_io_request *wreq = NULL; struct folio *folio = NULL, *writethrough = NULL; unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0; ssize_t written = 0, ret, ret2; loff_t i_size, pos = iocb->ki_pos; size_t max_chunk = mapping_max_folio_size(mapping); bool maybe_trouble = false; if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) || iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) ) { wbc_attach_fdatawrite_inode(&wbc, mapping->host); ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count); if (ret < 0) { wbc_detach_inode(&wbc); goto out; } wreq = netfs_begin_writethrough(iocb, iter->count); if (IS_ERR(wreq)) { wbc_detach_inode(&wbc); ret = PTR_ERR(wreq); wreq = NULL; goto out; } if (!is_sync_kiocb(iocb)) wreq->iocb = iocb; netfs_stat(&netfs_n_wh_writethrough); } else { netfs_stat(&netfs_n_wh_buffered_write); } do { struct netfs_folio *finfo; struct netfs_group *group; unsigned long long fpos; size_t flen; size_t offset; /* Offset into pagecache folio */ size_t part; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ offset = pos & (max_chunk - 1); part = min(max_chunk - offset, iov_iter_count(iter)); /* Bring in the user pages that we will copy from _first_ lest * we hit a nasty deadlock on copying from the same page as * we're writing to, without it being marked uptodate. * * Not only is this an optimisation, but it is also required to * check that the address is actually valid, when atomic * usercopies are used below. * * We rely on the page being held onto long enough by the LRU * that we can grab it below if this causes it to be read. */ ret = -EFAULT; if (unlikely(fault_in_iov_iter_readable(iter, part) == part)) break; folio = netfs_grab_folio_for_write(mapping, pos, part); if (IS_ERR(folio)) { ret = PTR_ERR(folio); break; } flen = folio_size(folio); fpos = folio_pos(folio); offset = pos - fpos; part = min_t(size_t, flen - offset, part); /* Wait for writeback to complete. The writeback engine owns * the info in folio->private and may change it until it * removes the WB mark. */ if (folio_get_private(folio) && folio_wait_writeback_killable(folio)) { ret = written ? -EINTR : -ERESTARTSYS; goto error_folio_unlock; } if (signal_pending(current)) { ret = written ? -EINTR : -ERESTARTSYS; goto error_folio_unlock; } /* Decide how we should modify a folio. We might be attempting * to do write-streaming, in which case we don't want to a * local RMW cycle if we can avoid it. If we're doing local * caching or content crypto, we award that priority over * avoiding RMW. If the file is open readably, then we also * assume that we may want to read what we wrote. */ finfo = netfs_folio_info(folio); group = netfs_folio_group(folio); if (unlikely(group != netfs_group) && group != NETFS_FOLIO_COPY_TO_CACHE) goto flush_content; if (folio_test_uptodate(folio)) { if (mapping_writably_mapped(mapping)) flush_dcache_folio(folio); copied = copy_folio_from_iter_atomic(folio, offset, part, iter); if (unlikely(copied == 0)) goto copy_failed; netfs_set_group(folio, netfs_group); trace_netfs_folio(folio, netfs_folio_is_uptodate); goto copied; } /* If the page is above the zero-point then we assume that the * server would just return a block of zeros or a short read if * we try to read it. */ if (fpos >= ctx->zero_point) { zero_user_segment(&folio->page, 0, offset); copied = copy_folio_from_iter_atomic(folio, offset, part, iter); if (unlikely(copied == 0)) goto copy_failed; zero_user_segment(&folio->page, offset + copied, flen); __netfs_set_group(folio, netfs_group); folio_mark_uptodate(folio); trace_netfs_folio(folio, netfs_modify_and_clear); goto copied; } /* See if we can write a whole folio in one go. */ if (!maybe_trouble && offset == 0 && part >= flen) { copied = copy_folio_from_iter_atomic(folio, offset, part, iter); if (unlikely(copied == 0)) goto copy_failed; if (unlikely(copied < part)) { maybe_trouble = true; iov_iter_revert(iter, copied); copied = 0; folio_unlock(folio); goto retry; } __netfs_set_group(folio, netfs_group); folio_mark_uptodate(folio); trace_netfs_folio(folio, netfs_whole_folio_modify); goto copied; } /* We don't want to do a streaming write on a file that loses * caching service temporarily because the backing store got * culled and we don't really want to get a streaming write on * a file that's open for reading as ->read_folio() then has to * be able to flush it. */ if ((file->f_mode & FMODE_READ) || netfs_is_cache_enabled(ctx)) { if (finfo) { netfs_stat(&netfs_n_wh_wstream_conflict); goto flush_content; } ret = netfs_prefetch_for_write(file, folio, offset, part); if (ret < 0) { _debug("prefetch = %zd", ret); goto error_folio_unlock; } /* Note that copy-to-cache may have been set. */ copied = copy_folio_from_iter_atomic(folio, offset, part, iter); if (unlikely(copied == 0)) goto copy_failed; netfs_set_group(folio, netfs_group); trace_netfs_folio(folio, netfs_just_prefetch); goto copied; } if (!finfo) { ret = -EIO; if (WARN_ON(folio_get_private(folio))) goto error_folio_unlock; copied = copy_folio_from_iter_atomic(folio, offset, part, iter); if (unlikely(copied == 0)) goto copy_failed; if (offset == 0 && copied == flen) { __netfs_set_group(folio, netfs_group); folio_mark_uptodate(folio); trace_netfs_folio(folio, netfs_streaming_filled_page); goto copied; } finfo = kzalloc(sizeof(*finfo), GFP_KERNEL); if (!finfo) { iov_iter_revert(iter, copied); ret = -ENOMEM; goto error_folio_unlock; } finfo->netfs_group = netfs_get_group(netfs_group); finfo->dirty_offset = offset; finfo->dirty_len = copied; folio_attach_private(folio, (void *)((unsigned long)finfo | NETFS_FOLIO_INFO)); trace_netfs_folio(folio, netfs_streaming_write); goto copied; } /* We can continue a streaming write only if it continues on * from the previous. If it overlaps, we must flush lest we * suffer a partial copy and disjoint dirty regions. */ if (offset == finfo->dirty_offset + finfo->dirty_len) { copied = copy_folio_from_iter_atomic(folio, offset, part, iter); if (unlikely(copied == 0)) goto copy_failed; finfo->dirty_len += copied; if (finfo->dirty_offset == 0 && finfo->dirty_len == flen) { if (finfo->netfs_group) folio_change_private(folio, finfo->netfs_group); else folio_detach_private(folio); folio_mark_uptodate(folio); kfree(finfo); trace_netfs_folio(folio, netfs_streaming_cont_filled_page); } else { trace_netfs_folio(folio, netfs_streaming_write_cont); } goto copied; } /* Incompatible write; flush the folio and try again. */ flush_content: trace_netfs_folio(folio, netfs_flush_content); folio_unlock(folio); folio_put(folio); ret = filemap_write_and_wait_range(mapping, fpos, fpos + flen - 1); if (ret < 0) goto error_folio_unlock; continue; copied: flush_dcache_folio(folio); /* Update the inode size if we moved the EOF marker */ pos += copied; i_size = i_size_read(inode); if (pos > i_size) netfs_update_i_size(ctx, inode, i_size, pos, copied); written += copied; if (likely(!wreq)) { folio_mark_dirty(folio); folio_unlock(folio); } else { netfs_advance_writethrough(wreq, &wbc, folio, copied, offset + copied == flen, &writethrough); /* Folio unlocked */ } retry: folio_put(folio); folio = NULL; ret = balance_dirty_pages_ratelimited_flags(mapping, bdp_flags); if (unlikely(ret < 0)) break; cond_resched(); } while (iov_iter_count(iter)); out: if (likely(written)) { /* Set indication that ctime and mtime got updated in case * close is deferred. */ set_bit(NETFS_ICTX_MODIFIED_ATTR, &ctx->flags); if (unlikely(ctx->ops->post_modify)) ctx->ops->post_modify(inode); } if (unlikely(wreq)) { ret2 = netfs_end_writethrough(wreq, &wbc, writethrough); wbc_detach_inode(&wbc); if (ret2 == -EIOCBQUEUED) return ret2; if (ret == 0) ret = ret2; } iocb->ki_pos += written; _leave(" = %zd [%zd]", written, ret); return written ? written : ret; copy_failed: ret = -EFAULT; error_folio_unlock: folio_unlock(folio); folio_put(folio); goto out; } EXPORT_SYMBOL(netfs_perform_write); /** * netfs_buffered_write_iter_locked - write data to a file * @iocb: IO state structure (file, offset, etc.) * @from: iov_iter with data to write * @netfs_group: Grouping for dirty pages (eg. ceph snaps). * * This function does all the work needed for actually writing data to a * file. It does all basic checks, removes SUID from the file, updates * modification times and calls proper subroutines depending on whether we * do direct IO or a standard buffered write. * * The caller must hold appropriate locks around this function and have called * generic_write_checks() already. The caller is also responsible for doing * any necessary syncing afterwards. * * This function does *not* take care of syncing data in case of O_SYNC write. * A caller has to handle it. This is mainly due to the fact that we want to * avoid syncing under i_rwsem. * * Return: * * number of bytes written, even for truncated writes * * negative error code if no data has been written at all */ ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from, struct netfs_group *netfs_group) { struct file *file = iocb->ki_filp; ssize_t ret; trace_netfs_write_iter(iocb, from); ret = file_remove_privs(file); if (ret) return ret; ret = file_update_time(file); if (ret) return ret; return netfs_perform_write(iocb, from, netfs_group); } EXPORT_SYMBOL(netfs_buffered_write_iter_locked); /** * netfs_file_write_iter - write data to a file * @iocb: IO state structure * @from: iov_iter with data to write * * Perform a write to a file, writing into the pagecache if possible and doing * an unbuffered write instead if not. * * Return: * * Negative error code if no data has been written at all of * vfs_fsync_range() failed for a synchronous write * * Number of bytes written, even for truncated writes */ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct netfs_inode *ictx = netfs_inode(inode); ssize_t ret; _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); if (!iov_iter_count(from)) return 0; if ((iocb->ki_flags & IOCB_DIRECT) || test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) return netfs_unbuffered_write_iter(iocb, from); ret = netfs_start_io_write(inode); if (ret < 0) return ret; ret = generic_write_checks(iocb, from); if (ret > 0) ret = netfs_buffered_write_iter_locked(iocb, from, NULL); netfs_end_io_write(inode); if (ret > 0) ret = generic_write_sync(iocb, ret); return ret; } EXPORT_SYMBOL(netfs_file_write_iter); /* * Notification that a previously read-only page is about to become writable. * Note that the caller indicates a single page of a multipage folio. */ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group) { struct netfs_group *group; struct folio *folio = page_folio(vmf->page); struct file *file = vmf->vma->vm_file; struct address_space *mapping = file->f_mapping; struct inode *inode = file_inode(file); struct netfs_inode *ictx = netfs_inode(inode); vm_fault_t ret = VM_FAULT_RETRY; int err; _enter("%lx", folio->index); sb_start_pagefault(inode->i_sb); if (folio_lock_killable(folio) < 0) goto out; if (folio->mapping != mapping) { folio_unlock(folio); ret = VM_FAULT_NOPAGE; goto out; } if (folio_wait_writeback_killable(folio)) { ret = VM_FAULT_LOCKED; goto out; } /* Can we see a streaming write here? */ if (WARN_ON(!folio_test_uptodate(folio))) { ret = VM_FAULT_SIGBUS | VM_FAULT_LOCKED; goto out; } group = netfs_folio_group(folio); if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) { folio_unlock(folio); err = filemap_fdatawrite_range(mapping, folio_pos(folio), folio_pos(folio) + folio_size(folio)); switch (err) { case 0: ret = VM_FAULT_RETRY; goto out; case -ENOMEM: ret = VM_FAULT_OOM; goto out; default: ret = VM_FAULT_SIGBUS; goto out; } } if (folio_test_dirty(folio)) trace_netfs_folio(folio, netfs_folio_trace_mkwrite_plus); else trace_netfs_folio(folio, netfs_folio_trace_mkwrite); netfs_set_group(folio, netfs_group); file_update_time(file); if (ictx->ops->post_modify) ictx->ops->post_modify(inode); ret = VM_FAULT_LOCKED; out: sb_end_pagefault(inode->i_sb); return ret; } EXPORT_SYMBOL(netfs_page_mkwrite);
2 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 // SPDX-License-Identifier: GPL-2.0+ /* * LED & force feedback support for BigBen Interactive * * 0x146b:0x0902 "Bigben Interactive Bigben Game Pad" * "Kid-friendly Wired Controller" PS3OFMINIPAD SONY * sold for use with the PS3 * * Copyright (c) 2018 Hanno Zulla <kontakt@hanno.de> */ #include <linux/input.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/leds.h> #include <linux/hid.h> #include "hid-ids.h" /* * The original descriptor for 0x146b:0x0902 * * 0x05, 0x01, // Usage Page (Generic Desktop Ctrls) * 0x09, 0x05, // Usage (Game Pad) * 0xA1, 0x01, // Collection (Application) * 0x15, 0x00, // Logical Minimum (0) * 0x25, 0x01, // Logical Maximum (1) * 0x35, 0x00, // Physical Minimum (0) * 0x45, 0x01, // Physical Maximum (1) * 0x75, 0x01, // Report Size (1) * 0x95, 0x0D, // Report Count (13) * 0x05, 0x09, // Usage Page (Button) * 0x19, 0x01, // Usage Minimum (0x01) * 0x29, 0x0D, // Usage Maximum (0x0D) * 0x81, 0x02, // Input (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position) * 0x95, 0x03, // Report Count (3) * 0x81, 0x01, // Input (Const,Array,Abs,No Wrap,Linear,Preferred State,No Null Position) * 0x05, 0x01, // Usage Page (Generic Desktop Ctrls) * 0x25, 0x07, // Logical Maximum (7) * 0x46, 0x3B, 0x01, // Physical Maximum (315) * 0x75, 0x04, // Report Size (4) * 0x95, 0x01, // Report Count (1) * 0x65, 0x14, // Unit (System: English Rotation, Length: Centimeter) * 0x09, 0x39, // Usage (Hat switch) * 0x81, 0x42, // Input (Data,Var,Abs,No Wrap,Linear,Preferred State,Null State) * 0x65, 0x00, // Unit (None) * 0x95, 0x01, // Report Count (1) * 0x81, 0x01, // Input (Const,Array,Abs,No Wrap,Linear,Preferred State,No Null Position) * 0x26, 0xFF, 0x00, // Logical Maximum (255) * 0x46, 0xFF, 0x00, // Physical Maximum (255) * 0x09, 0x30, // Usage (X) * 0x09, 0x31, // Usage (Y) * 0x09, 0x32, // Usage (Z) * 0x09, 0x35, // Usage (Rz) * 0x75, 0x08, // Report Size (8) * 0x95, 0x04, // Report Count (4) * 0x81, 0x02, // Input (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position) * 0x06, 0x00, 0xFF, // Usage Page (Vendor Defined 0xFF00) * 0x09, 0x20, // Usage (0x20) * 0x09, 0x21, // Usage (0x21) * 0x09, 0x22, // Usage (0x22) * 0x09, 0x23, // Usage (0x23) * 0x09, 0x24, // Usage (0x24) * 0x09, 0x25, // Usage (0x25) * 0x09, 0x26, // Usage (0x26) * 0x09, 0x27, // Usage (0x27) * 0x09, 0x28, // Usage (0x28) * 0x09, 0x29, // Usage (0x29) * 0x09, 0x2A, // Usage (0x2A) * 0x09, 0x2B, // Usage (0x2B) * 0x95, 0x0C, // Report Count (12) * 0x81, 0x02, // Input (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position) * 0x0A, 0x21, 0x26, // Usage (0x2621) * 0x95, 0x08, // Report Count (8) * 0xB1, 0x02, // Feature (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position,Non-volatile) * 0x0A, 0x21, 0x26, // Usage (0x2621) * 0x91, 0x02, // Output (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position,Non-volatile) * 0x26, 0xFF, 0x03, // Logical Maximum (1023) * 0x46, 0xFF, 0x03, // Physical Maximum (1023) * 0x09, 0x2C, // Usage (0x2C) * 0x09, 0x2D, // Usage (0x2D) * 0x09, 0x2E, // Usage (0x2E) * 0x09, 0x2F, // Usage (0x2F) * 0x75, 0x10, // Report Size (16) * 0x95, 0x04, // Report Count (4) * 0x81, 0x02, // Input (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position) * 0xC0, // End Collection */ #define PID0902_RDESC_ORIG_SIZE 137 /* * The fixed descriptor for 0x146b:0x0902 * * - map buttons according to gamepad.rst * - assign right stick from Z/Rz to Rx/Ry * - map previously unused analog trigger data to Z/RZ * - simplify feature and output descriptor */ static __u8 pid0902_rdesc_fixed[] = { 0x05, 0x01, /* Usage Page (Generic Desktop Ctrls) */ 0x09, 0x05, /* Usage (Game Pad) */ 0xA1, 0x01, /* Collection (Application) */ 0x15, 0x00, /* Logical Minimum (0) */ 0x25, 0x01, /* Logical Maximum (1) */ 0x35, 0x00, /* Physical Minimum (0) */ 0x45, 0x01, /* Physical Maximum (1) */ 0x75, 0x01, /* Report Size (1) */ 0x95, 0x0D, /* Report Count (13) */ 0x05, 0x09, /* Usage Page (Button) */ 0x09, 0x05, /* Usage (BTN_WEST) */ 0x09, 0x01, /* Usage (BTN_SOUTH) */ 0x09, 0x02, /* Usage (BTN_EAST) */ 0x09, 0x04, /* Usage (BTN_NORTH) */ 0x09, 0x07, /* Usage (BTN_TL) */ 0x09, 0x08, /* Usage (BTN_TR) */ 0x09, 0x09, /* Usage (BTN_TL2) */ 0x09, 0x0A, /* Usage (BTN_TR2) */ 0x09, 0x0B, /* Usage (BTN_SELECT) */ 0x09, 0x0C, /* Usage (BTN_START) */ 0x09, 0x0E, /* Usage (BTN_THUMBL) */ 0x09, 0x0F, /* Usage (BTN_THUMBR) */ 0x09, 0x0D, /* Usage (BTN_MODE) */ 0x81, 0x02, /* Input (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position) */ 0x75, 0x01, /* Report Size (1) */ 0x95, 0x03, /* Report Count (3) */ 0x81, 0x01, /* Input (Const,Array,Abs,No Wrap,Linear,Preferred State,No Null Position) */ 0x05, 0x01, /* Usage Page (Generic Desktop Ctrls) */ 0x25, 0x07, /* Logical Maximum (7) */ 0x46, 0x3B, 0x01, /* Physical Maximum (315) */ 0x75, 0x04, /* Report Size (4) */ 0x95, 0x01, /* Report Count (1) */ 0x65, 0x14, /* Unit (System: English Rotation, Length: Centimeter) */ 0x09, 0x39, /* Usage (Hat switch) */ 0x81, 0x42, /* Input (Data,Var,Abs,No Wrap,Linear,Preferred State,Null State) */ 0x65, 0x00, /* Unit (None) */ 0x95, 0x01, /* Report Count (1) */ 0x81, 0x01, /* Input (Const,Array,Abs,No Wrap,Linear,Preferred State,No Null Position) */ 0x26, 0xFF, 0x00, /* Logical Maximum (255) */ 0x46, 0xFF, 0x00, /* Physical Maximum (255) */ 0x09, 0x30, /* Usage (X) */ 0x09, 0x31, /* Usage (Y) */ 0x09, 0x33, /* Usage (Rx) */ 0x09, 0x34, /* Usage (Ry) */ 0x75, 0x08, /* Report Size (8) */ 0x95, 0x04, /* Report Count (4) */ 0x81, 0x02, /* Input (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position) */ 0x95, 0x0A, /* Report Count (10) */ 0x81, 0x01, /* Input (Const,Array,Abs,No Wrap,Linear,Preferred State,No Null Position) */ 0x05, 0x01, /* Usage Page (Generic Desktop Ctrls) */ 0x26, 0xFF, 0x00, /* Logical Maximum (255) */ 0x46, 0xFF, 0x00, /* Physical Maximum (255) */ 0x09, 0x32, /* Usage (Z) */ 0x09, 0x35, /* Usage (Rz) */ 0x95, 0x02, /* Report Count (2) */ 0x81, 0x02, /* Input (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position) */ 0x95, 0x08, /* Report Count (8) */ 0x81, 0x01, /* Input (Const,Array,Abs,No Wrap,Linear,Preferred State,No Null Position) */ 0x06, 0x00, 0xFF, /* Usage Page (Vendor Defined 0xFF00) */ 0xB1, 0x02, /* Feature (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position,Non-volatile) */ 0x0A, 0x21, 0x26, /* Usage (0x2621) */ 0x95, 0x08, /* Report Count (8) */ 0x91, 0x02, /* Output (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position,Non-volatile) */ 0x0A, 0x21, 0x26, /* Usage (0x2621) */ 0x95, 0x08, /* Report Count (8) */ 0x81, 0x02, /* Input (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position) */ 0xC0, /* End Collection */ }; #define NUM_LEDS 4 struct bigben_device { struct hid_device *hid; struct hid_report *report; spinlock_t lock; bool removed; u8 led_state; /* LED1 = 1 .. LED4 = 8 */ u8 right_motor_on; /* right motor off/on 0/1 */ u8 left_motor_force; /* left motor force 0-255 */ struct led_classdev *leds[NUM_LEDS]; bool work_led; bool work_ff; struct work_struct worker; }; static inline void bigben_schedule_work(struct bigben_device *bigben) { unsigned long flags; spin_lock_irqsave(&bigben->lock, flags); if (!bigben->removed) schedule_work(&bigben->worker); spin_unlock_irqrestore(&bigben->lock, flags); } static void bigben_worker(struct work_struct *work) { struct bigben_device *bigben = container_of(work, struct bigben_device, worker); struct hid_field *report_field = bigben->report->field[0]; bool do_work_led = false; bool do_work_ff = false; u8 *buf; u32 len; unsigned long flags; buf = hid_alloc_report_buf(bigben->report, GFP_KERNEL); if (!buf) return; len = hid_report_len(bigben->report); /* LED work */ spin_lock_irqsave(&bigben->lock, flags); if (bigben->work_led) { bigben->work_led = false; do_work_led = true; report_field->value[0] = 0x01; /* 1 = led message */ report_field->value[1] = 0x08; /* reserved value, always 8 */ report_field->value[2] = bigben->led_state; report_field->value[3] = 0x00; /* padding */ report_field->value[4] = 0x00; /* padding */ report_field->value[5] = 0x00; /* padding */ report_field->value[6] = 0x00; /* padding */ report_field->value[7] = 0x00; /* padding */ hid_output_report(bigben->report, buf); } spin_unlock_irqrestore(&bigben->lock, flags); if (do_work_led) { hid_hw_raw_request(bigben->hid, bigben->report->id, buf, len, bigben->report->type, HID_REQ_SET_REPORT); } /* FF work */ spin_lock_irqsave(&bigben->lock, flags); if (bigben->work_ff) { bigben->work_ff = false; do_work_ff = true; report_field->value[0] = 0x02; /* 2 = rumble effect message */ report_field->value[1] = 0x08; /* reserved value, always 8 */ report_field->value[2] = bigben->right_motor_on; report_field->value[3] = bigben->left_motor_force; report_field->value[4] = 0xff; /* duration 0-254 (255 = nonstop) */ report_field->value[5] = 0x00; /* padding */ report_field->value[6] = 0x00; /* padding */ report_field->value[7] = 0x00; /* padding */ hid_output_report(bigben->report, buf); } spin_unlock_irqrestore(&bigben->lock, flags); if (do_work_ff) { hid_hw_raw_request(bigben->hid, bigben->report->id, buf, len, bigben->report->type, HID_REQ_SET_REPORT); } kfree(buf); } static int hid_bigben_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct bigben_device *bigben = hid_get_drvdata(hid); u8 right_motor_on; u8 left_motor_force; unsigned long flags; if (!bigben) { hid_err(hid, "no device data\n"); return 0; } if (effect->type != FF_RUMBLE) return 0; right_motor_on = effect->u.rumble.weak_magnitude ? 1 : 0; left_motor_force = effect->u.rumble.strong_magnitude / 256; if (right_motor_on != bigben->right_motor_on || left_motor_force != bigben->left_motor_force) { spin_lock_irqsave(&bigben->lock, flags); bigben->right_motor_on = right_motor_on; bigben->left_motor_force = left_motor_force; bigben->work_ff = true; spin_unlock_irqrestore(&bigben->lock, flags); bigben_schedule_work(bigben); } return 0; } static void bigben_set_led(struct led_classdev *led, enum led_brightness value) { struct device *dev = led->dev->parent; struct hid_device *hid = to_hid_device(dev); struct bigben_device *bigben = hid_get_drvdata(hid); int n; bool work; unsigned long flags; if (!bigben) { hid_err(hid, "no device data\n"); return; } for (n = 0; n < NUM_LEDS; n++) { if (led == bigben->leds[n]) { spin_lock_irqsave(&bigben->lock, flags); if (value == LED_OFF) { work = (bigben->led_state & BIT(n)); bigben->led_state &= ~BIT(n); } else { work = !(bigben->led_state & BIT(n)); bigben->led_state |= BIT(n); } spin_unlock_irqrestore(&bigben->lock, flags); if (work) { bigben->work_led = true; bigben_schedule_work(bigben); } return; } } } static enum led_brightness bigben_get_led(struct led_classdev *led) { struct device *dev = led->dev->parent; struct hid_device *hid = to_hid_device(dev); struct bigben_device *bigben = hid_get_drvdata(hid); int n; if (!bigben) { hid_err(hid, "no device data\n"); return LED_OFF; } for (n = 0; n < NUM_LEDS; n++) { if (led == bigben->leds[n]) return (bigben->led_state & BIT(n)) ? LED_ON : LED_OFF; } return LED_OFF; } static void bigben_remove(struct hid_device *hid) { struct bigben_device *bigben = hid_get_drvdata(hid); unsigned long flags; spin_lock_irqsave(&bigben->lock, flags); bigben->removed = true; spin_unlock_irqrestore(&bigben->lock, flags); cancel_work_sync(&bigben->worker); hid_hw_stop(hid); } static int bigben_probe(struct hid_device *hid, const struct hid_device_id *id) { struct bigben_device *bigben; struct hid_input *hidinput; struct led_classdev *led; char *name; size_t name_sz; int n, error; bigben = devm_kzalloc(&hid->dev, sizeof(*bigben), GFP_KERNEL); if (!bigben) return -ENOMEM; hid_set_drvdata(hid, bigben); bigben->hid = hid; bigben->removed = false; error = hid_parse(hid); if (error) { hid_err(hid, "parse failed\n"); return error; } error = hid_hw_start(hid, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF); if (error) { hid_err(hid, "hw start failed\n"); return error; } bigben->report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 8); if (!bigben->report) { hid_err(hid, "no output report found\n"); error = -ENODEV; goto error_hw_stop; } if (list_empty(&hid->inputs)) { hid_err(hid, "no inputs found\n"); error = -ENODEV; goto error_hw_stop; } hidinput = list_first_entry(&hid->inputs, struct hid_input, list); set_bit(FF_RUMBLE, hidinput->input->ffbit); INIT_WORK(&bigben->worker, bigben_worker); spin_lock_init(&bigben->lock); error = input_ff_create_memless(hidinput->input, NULL, hid_bigben_play_effect); if (error) goto error_hw_stop; name_sz = strlen(dev_name(&hid->dev)) + strlen(":red:bigben#") + 1; for (n = 0; n < NUM_LEDS; n++) { led = devm_kzalloc( &hid->dev, sizeof(struct led_classdev) + name_sz, GFP_KERNEL ); if (!led) { error = -ENOMEM; goto error_hw_stop; } name = (void *)(&led[1]); snprintf(name, name_sz, "%s:red:bigben%d", dev_name(&hid->dev), n + 1 ); led->name = name; led->brightness = (n == 0) ? LED_ON : LED_OFF; led->max_brightness = 1; led->brightness_get = bigben_get_led; led->brightness_set = bigben_set_led; bigben->leds[n] = led; error = devm_led_classdev_register(&hid->dev, led); if (error) goto error_hw_stop; } /* initial state: LED1 is on, no rumble effect */ bigben->led_state = BIT(0); bigben->right_motor_on = 0; bigben->left_motor_force = 0; bigben->work_led = true; bigben->work_ff = true; bigben_schedule_work(bigben); hid_info(hid, "LED and force feedback support for BigBen gamepad\n"); return 0; error_hw_stop: hid_hw_stop(hid); return error; } static __u8 *bigben_report_fixup(struct hid_device *hid, __u8 *rdesc, unsigned int *rsize) { if (*rsize == PID0902_RDESC_ORIG_SIZE) { rdesc = pid0902_rdesc_fixed; *rsize = sizeof(pid0902_rdesc_fixed); } else hid_warn(hid, "unexpected rdesc, please submit for review\n"); return rdesc; } static const struct hid_device_id bigben_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_BIGBEN, USB_DEVICE_ID_BIGBEN_PS3OFMINIPAD) }, { } }; MODULE_DEVICE_TABLE(hid, bigben_devices); static struct hid_driver bigben_driver = { .name = "bigben", .id_table = bigben_devices, .probe = bigben_probe, .report_fixup = bigben_report_fixup, .remove = bigben_remove, }; module_hid_driver(bigben_driver); MODULE_DESCRIPTION("LED & force feedback support for BigBen Interactive"); MODULE_LICENSE("GPL");
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 // SPDX-License-Identifier: GPL-2.0-or-later // SPI init/core code // // Copyright (C) 2005 David Brownell // Copyright (C) 2008 Secret Lab Technologies Ltd. #include <linux/acpi.h> #include <linux/cache.h> #include <linux/clk/clk-conf.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/dmaengine.h> #include <linux/dma-mapping.h> #include <linux/export.h> #include <linux/gpio/consumer.h> #include <linux/highmem.h> #include <linux/idr.h> #include <linux/init.h> #include <linux/ioport.h> #include <linux/kernel.h> #include <linux/kthread.h> #include <linux/mod_devicetable.h> #include <linux/mutex.h> #include <linux/of_device.h> #include <linux/of_irq.h> #include <linux/percpu.h> #include <linux/platform_data/x86/apple.h> #include <linux/pm_domain.h> #include <linux/pm_runtime.h> #include <linux/property.h> #include <linux/ptp_clock_kernel.h> #include <linux/sched/rt.h> #include <linux/slab.h> #include <linux/spi/spi.h> #include <linux/spi/spi-mem.h> #include <uapi/linux/sched/types.h> #define CREATE_TRACE_POINTS #include <trace/events/spi.h> EXPORT_TRACEPOINT_SYMBOL(spi_transfer_start); EXPORT_TRACEPOINT_SYMBOL(spi_transfer_stop); #include "internals.h" static DEFINE_IDR(spi_master_idr); static void spidev_release(struct device *dev) { struct spi_device *spi = to_spi_device(dev); spi_controller_put(spi->controller); kfree(spi->driver_override); free_percpu(spi->pcpu_statistics); kfree(spi); } static ssize_t modalias_show(struct device *dev, struct device_attribute *a, char *buf) { const struct spi_device *spi = to_spi_device(dev); int len; len = acpi_device_modalias(dev, buf, PAGE_SIZE - 1); if (len != -ENODEV) return len; return sysfs_emit(buf, "%s%s\n", SPI_MODULE_PREFIX, spi->modalias); } static DEVICE_ATTR_RO(modalias); static ssize_t driver_override_store(struct device *dev, struct device_attribute *a, const char *buf, size_t count) { struct spi_device *spi = to_spi_device(dev); int ret; ret = driver_set_override(dev, &spi->driver_override, buf, count); if (ret) return ret; return count; } static ssize_t driver_override_show(struct device *dev, struct device_attribute *a, char *buf) { const struct spi_device *spi = to_spi_device(dev); ssize_t len; device_lock(dev); len = sysfs_emit(buf, "%s\n", spi->driver_override ? : ""); device_unlock(dev); return len; } static DEVICE_ATTR_RW(driver_override); static struct spi_statistics __percpu *spi_alloc_pcpu_stats(struct device *dev) { struct spi_statistics __percpu *pcpu_stats; if (dev) pcpu_stats = devm_alloc_percpu(dev, struct spi_statistics); else pcpu_stats = alloc_percpu_gfp(struct spi_statistics, GFP_KERNEL); if (pcpu_stats) { int cpu; for_each_possible_cpu(cpu) { struct spi_statistics *stat; stat = per_cpu_ptr(pcpu_stats, cpu); u64_stats_init(&stat->syncp); } } return pcpu_stats; } static ssize_t spi_emit_pcpu_stats(struct spi_statistics __percpu *stat, char *buf, size_t offset) { u64 val = 0; int i; for_each_possible_cpu(i) { const struct spi_statistics *pcpu_stats; u64_stats_t *field; unsigned int start; u64 inc; pcpu_stats = per_cpu_ptr(stat, i); field = (void *)pcpu_stats + offset; do { start = u64_stats_fetch_begin(&pcpu_stats->syncp); inc = u64_stats_read(field); } while (u64_stats_fetch_retry(&pcpu_stats->syncp, start)); val += inc; } return sysfs_emit(buf, "%llu\n", val); } #define SPI_STATISTICS_ATTRS(field, file) \ static ssize_t spi_controller_##field##_show(struct device *dev, \ struct device_attribute *attr, \ char *buf) \ { \ struct spi_controller *ctlr = container_of(dev, \ struct spi_controller, dev); \ return spi_statistics_##field##_show(ctlr->pcpu_statistics, buf); \ } \ static struct device_attribute dev_attr_spi_controller_##field = { \ .attr = { .name = file, .mode = 0444 }, \ .show = spi_controller_##field##_show, \ }; \ static ssize_t spi_device_##field##_show(struct device *dev, \ struct device_attribute *attr, \ char *buf) \ { \ struct spi_device *spi = to_spi_device(dev); \ return spi_statistics_##field##_show(spi->pcpu_statistics, buf); \ } \ static struct device_attribute dev_attr_spi_device_##field = { \ .attr = { .name = file, .mode = 0444 }, \ .show = spi_device_##field##_show, \ } #define SPI_STATISTICS_SHOW_NAME(name, file, field) \ static ssize_t spi_statistics_##name##_show(struct spi_statistics __percpu *stat, \ char *buf) \ { \ return spi_emit_pcpu_stats(stat, buf, \ offsetof(struct spi_statistics, field)); \ } \ SPI_STATISTICS_ATTRS(name, file) #define SPI_STATISTICS_SHOW(field) \ SPI_STATISTICS_SHOW_NAME(field, __stringify(field), \ field) SPI_STATISTICS_SHOW(messages); SPI_STATISTICS_SHOW(transfers); SPI_STATISTICS_SHOW(errors); SPI_STATISTICS_SHOW(timedout); SPI_STATISTICS_SHOW(spi_sync); SPI_STATISTICS_SHOW(spi_sync_immediate); SPI_STATISTICS_SHOW(spi_async); SPI_STATISTICS_SHOW(bytes); SPI_STATISTICS_SHOW(bytes_rx); SPI_STATISTICS_SHOW(bytes_tx); #define SPI_STATISTICS_TRANSFER_BYTES_HISTO(index, number) \ SPI_STATISTICS_SHOW_NAME(transfer_bytes_histo##index, \ "transfer_bytes_histo_" number, \ transfer_bytes_histo[index]) SPI_STATISTICS_TRANSFER_BYTES_HISTO(0, "0-1"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(1, "2-3"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(2, "4-7"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(3, "8-15"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(4, "16-31"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(5, "32-63"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(6, "64-127"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(7, "128-255"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(8, "256-511"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(9, "512-1023"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(10, "1024-2047"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(11, "2048-4095"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(12, "4096-8191"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(13, "8192-16383"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(14, "16384-32767"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(15, "32768-65535"); SPI_STATISTICS_TRANSFER_BYTES_HISTO(16, "65536+"); SPI_STATISTICS_SHOW(transfers_split_maxsize); static struct attribute *spi_dev_attrs[] = { &dev_attr_modalias.attr, &dev_attr_driver_override.attr, NULL, }; static const struct attribute_group spi_dev_group = { .attrs = spi_dev_attrs, }; static struct attribute *spi_device_statistics_attrs[] = { &dev_attr_spi_device_messages.attr, &dev_attr_spi_device_transfers.attr, &dev_attr_spi_device_errors.attr, &dev_attr_spi_device_timedout.attr, &dev_attr_spi_device_spi_sync.attr, &dev_attr_spi_device_spi_sync_immediate.attr, &dev_attr_spi_device_spi_async.attr, &dev_attr_spi_device_bytes.attr, &dev_attr_spi_device_bytes_rx.attr, &dev_attr_spi_device_bytes_tx.attr, &dev_attr_spi_device_transfer_bytes_histo0.attr, &dev_attr_spi_device_transfer_bytes_histo1.attr, &dev_attr_spi_device_transfer_bytes_histo2.attr, &dev_attr_spi_device_transfer_bytes_histo3.attr, &dev_attr_spi_device_transfer_bytes_histo4.attr, &dev_attr_spi_device_transfer_bytes_histo5.attr, &dev_attr_spi_device_transfer_bytes_histo6.attr, &dev_attr_spi_device_transfer_bytes_histo7.attr, &dev_attr_spi_device_transfer_bytes_histo8.attr, &dev_attr_spi_device_transfer_bytes_histo9.attr, &dev_attr_spi_device_transfer_bytes_histo10.attr, &dev_attr_spi_device_transfer_bytes_histo11.attr, &dev_attr_spi_device_transfer_bytes_histo12.attr, &dev_attr_spi_device_transfer_bytes_histo13.attr, &dev_attr_spi_device_transfer_bytes_histo14.attr, &dev_attr_spi_device_transfer_bytes_histo15.attr, &dev_attr_spi_device_transfer_bytes_histo16.attr, &dev_attr_spi_device_transfers_split_maxsize.attr, NULL, }; static const struct attribute_group spi_device_statistics_group = { .name = "statistics", .attrs = spi_device_statistics_attrs, }; static const struct attribute_group *spi_dev_groups[] = { &spi_dev_group, &spi_device_statistics_group, NULL, }; static struct attribute *spi_controller_statistics_attrs[] = { &dev_attr_spi_controller_messages.attr, &dev_attr_spi_controller_transfers.attr, &dev_attr_spi_controller_errors.attr, &dev_attr_spi_controller_timedout.attr, &dev_attr_spi_controller_spi_sync.attr, &dev_attr_spi_controller_spi_sync_immediate.attr, &dev_attr_spi_controller_spi_async.attr, &dev_attr_spi_controller_bytes.attr, &dev_attr_spi_controller_bytes_rx.attr, &dev_attr_spi_controller_bytes_tx.attr, &dev_attr_spi_controller_transfer_bytes_histo0.attr, &dev_attr_spi_controller_transfer_bytes_histo1.attr, &dev_attr_spi_controller_transfer_bytes_histo2.attr, &dev_attr_spi_controller_transfer_bytes_histo3.attr, &dev_attr_spi_controller_transfer_bytes_histo4.attr, &dev_attr_spi_controller_transfer_bytes_histo5.attr, &dev_attr_spi_controller_transfer_bytes_histo6.attr, &dev_attr_spi_controller_transfer_bytes_histo7.attr, &dev_attr_spi_controller_transfer_bytes_histo8.attr, &dev_attr_spi_controller_transfer_bytes_histo9.attr, &dev_attr_spi_controller_transfer_bytes_histo10.attr, &dev_attr_spi_controller_transfer_bytes_histo11.attr, &dev_attr_spi_controller_transfer_bytes_histo12.attr, &dev_attr_spi_controller_transfer_bytes_histo13.attr, &dev_attr_spi_controller_transfer_bytes_histo14.attr, &dev_attr_spi_controller_transfer_bytes_histo15.attr, &dev_attr_spi_controller_transfer_bytes_histo16.attr, &dev_attr_spi_controller_transfers_split_maxsize.attr, NULL, }; static const struct attribute_group spi_controller_statistics_group = { .name = "statistics", .attrs = spi_controller_statistics_attrs, }; static const struct attribute_group *spi_master_groups[] = { &spi_controller_statistics_group, NULL, }; static void spi_statistics_add_transfer_stats(struct spi_statistics __percpu *pcpu_stats, struct spi_transfer *xfer, struct spi_message *msg) { int l2len = min(fls(xfer->len), SPI_STATISTICS_HISTO_SIZE) - 1; struct spi_statistics *stats; if (l2len < 0) l2len = 0; get_cpu(); stats = this_cpu_ptr(pcpu_stats); u64_stats_update_begin(&stats->syncp); u64_stats_inc(&stats->transfers); u64_stats_inc(&stats->transfer_bytes_histo[l2len]); u64_stats_add(&stats->bytes, xfer->len); if (spi_valid_txbuf(msg, xfer)) u64_stats_add(&stats->bytes_tx, xfer->len); if (spi_valid_rxbuf(msg, xfer)) u64_stats_add(&stats->bytes_rx, xfer->len); u64_stats_update_end(&stats->syncp); put_cpu(); } /* * modalias support makes "modprobe $MODALIAS" new-style hotplug work, * and the sysfs version makes coldplug work too. */ static const struct spi_device_id *spi_match_id(const struct spi_device_id *id, const char *name) { while (id->name[0]) { if (!strcmp(name, id->name)) return id; id++; } return NULL; } const struct spi_device_id *spi_get_device_id(const struct spi_device *sdev) { const struct spi_driver *sdrv = to_spi_driver(sdev->dev.driver); return spi_match_id(sdrv->id_table, sdev->modalias); } EXPORT_SYMBOL_GPL(spi_get_device_id); const void *spi_get_device_match_data(const struct spi_device *sdev) { const void *match; match = device_get_match_data(&sdev->dev); if (match) return match; return (const void *)spi_get_device_id(sdev)->driver_data; } EXPORT_SYMBOL_GPL(spi_get_device_match_data); static int spi_match_device(struct device *dev, const struct device_driver *drv) { const struct spi_device *spi = to_spi_device(dev); const struct spi_driver *sdrv = to_spi_driver(drv); /* Check override first, and if set, only use the named driver */ if (spi->driver_override) return strcmp(spi->driver_override, drv->name) == 0; /* Attempt an OF style match */ if (of_driver_match_device(dev, drv)) return 1; /* Then try ACPI */ if (acpi_driver_match_device(dev, drv)) return 1; if (sdrv->id_table) return !!spi_match_id(sdrv->id_table, spi->modalias); return strcmp(spi->modalias, drv->name) == 0; } static int spi_uevent(const struct device *dev, struct kobj_uevent_env *env) { const struct spi_device *spi = to_spi_device(dev); int rc; rc = acpi_device_uevent_modalias(dev, env); if (rc != -ENODEV) return rc; return add_uevent_var(env, "MODALIAS=%s%s", SPI_MODULE_PREFIX, spi->modalias); } static int spi_probe(struct device *dev) { const struct spi_driver *sdrv = to_spi_driver(dev->driver); struct spi_device *spi = to_spi_device(dev); int ret; ret = of_clk_set_defaults(dev->of_node, false); if (ret) return ret; if (dev->of_node) { spi->irq = of_irq_get(dev->of_node, 0); if (spi->irq == -EPROBE_DEFER) return -EPROBE_DEFER; if (spi->irq < 0) spi->irq = 0; } ret = dev_pm_domain_attach(dev, true); if (ret) return ret; if (sdrv->probe) { ret = sdrv->probe(spi); if (ret) dev_pm_domain_detach(dev, true); } return ret; } static void spi_remove(struct device *dev) { const struct spi_driver *sdrv = to_spi_driver(dev->driver); if (sdrv->remove) sdrv->remove(to_spi_device(dev)); dev_pm_domain_detach(dev, true); } static void spi_shutdown(struct device *dev) { if (dev->driver) { const struct spi_driver *sdrv = to_spi_driver(dev->driver); if (sdrv->shutdown) sdrv->shutdown(to_spi_device(dev)); } } const struct bus_type spi_bus_type = { .name = "spi", .dev_groups = spi_dev_groups, .match = spi_match_device, .uevent = spi_uevent, .probe = spi_probe, .remove = spi_remove, .shutdown = spi_shutdown, }; EXPORT_SYMBOL_GPL(spi_bus_type); /** * __spi_register_driver - register a SPI driver * @owner: owner module of the driver to register * @sdrv: the driver to register * Context: can sleep * * Return: zero on success, else a negative error code. */ int __spi_register_driver(struct module *owner, struct spi_driver *sdrv) { sdrv->driver.owner = owner; sdrv->driver.bus = &spi_bus_type; /* * For Really Good Reasons we use spi: modaliases not of: * modaliases for DT so module autoloading won't work if we * don't have a spi_device_id as well as a compatible string. */ if (sdrv->driver.of_match_table) { const struct of_device_id *of_id; for (of_id = sdrv->driver.of_match_table; of_id->compatible[0]; of_id++) { const char *of_name; /* Strip off any vendor prefix */ of_name = strnchr(of_id->compatible, sizeof(of_id->compatible), ','); if (of_name) of_name++; else of_name = of_id->compatible; if (sdrv->id_table) { const struct spi_device_id *spi_id; spi_id = spi_match_id(sdrv->id_table, of_name); if (spi_id) continue; } else { if (strcmp(sdrv->driver.name, of_name) == 0) continue; } pr_warn("SPI driver %s has no spi_device_id for %s\n", sdrv->driver.name, of_id->compatible); } } return driver_register(&sdrv->driver); } EXPORT_SYMBOL_GPL(__spi_register_driver); /*-------------------------------------------------------------------------*/ /* * SPI devices should normally not be created by SPI device drivers; that * would make them board-specific. Similarly with SPI controller drivers. * Device registration normally goes into like arch/.../mach.../board-YYY.c * with other readonly (flashable) information about mainboard devices. */ struct boardinfo { struct list_head list; struct spi_board_info board_info; }; static LIST_HEAD(board_list); static LIST_HEAD(spi_controller_list); /* * Used to protect add/del operation for board_info list and * spi_controller list, and their matching process also used * to protect object of type struct idr. */ static DEFINE_MUTEX(board_lock); /** * spi_alloc_device - Allocate a new SPI device * @ctlr: Controller to which device is connected * Context: can sleep * * Allows a driver to allocate and initialize a spi_device without * registering it immediately. This allows a driver to directly * fill the spi_device with device parameters before calling * spi_add_device() on it. * * Caller is responsible to call spi_add_device() on the returned * spi_device structure to add it to the SPI controller. If the caller * needs to discard the spi_device without adding it, then it should * call spi_dev_put() on it. * * Return: a pointer to the new device, or NULL. */ struct spi_device *spi_alloc_device(struct spi_controller *ctlr) { struct spi_device *spi; if (!spi_controller_get(ctlr)) return NULL; spi = kzalloc(sizeof(*spi), GFP_KERNEL); if (!spi) { spi_controller_put(ctlr); return NULL; } spi->pcpu_statistics = spi_alloc_pcpu_stats(NULL); if (!spi->pcpu_statistics) { kfree(spi); spi_controller_put(ctlr); return NULL; } spi->controller = ctlr; spi->dev.parent = &ctlr->dev; spi->dev.bus = &spi_bus_type; spi->dev.release = spidev_release; spi->mode = ctlr->buswidth_override_bits; device_initialize(&spi->dev); return spi; } EXPORT_SYMBOL_GPL(spi_alloc_device); static void spi_dev_set_name(struct spi_device *spi) { struct device *dev = &spi->dev; struct fwnode_handle *fwnode = dev_fwnode(dev); if (is_acpi_device_node(fwnode)) { dev_set_name(dev, "spi-%s", acpi_dev_name(to_acpi_device_node(fwnode))); return; } if (is_software_node(fwnode)) { dev_set_name(dev, "spi-%pfwP", fwnode); return; } dev_set_name(&spi->dev, "%s.%u", dev_name(&spi->controller->dev), spi_get_chipselect(spi, 0)); } /* * Zero(0) is a valid physical CS value and can be located at any * logical CS in the spi->chip_select[]. If all the physical CS * are initialized to 0 then It would be difficult to differentiate * between a valid physical CS 0 & an unused logical CS whose physical * CS can be 0. As a solution to this issue initialize all the CS to -1. * Now all the unused logical CS will have -1 physical CS value & can be * ignored while performing physical CS validity checks. */ #define SPI_INVALID_CS ((s8)-1) static inline bool is_valid_cs(s8 chip_select) { return chip_select != SPI_INVALID_CS; } static inline int spi_dev_check_cs(struct device *dev, struct spi_device *spi, u8 idx, struct spi_device *new_spi, u8 new_idx) { u8 cs, cs_new; u8 idx_new; cs = spi_get_chipselect(spi, idx); for (idx_new = new_idx; idx_new < SPI_CS_CNT_MAX; idx_new++) { cs_new = spi_get_chipselect(new_spi, idx_new); if (is_valid_cs(cs) && is_valid_cs(cs_new) && cs == cs_new) { dev_err(dev, "chipselect %u already in use\n", cs_new); return -EBUSY; } } return 0; } static int spi_dev_check(struct device *dev, void *data) { struct spi_device *spi = to_spi_device(dev); struct spi_device *new_spi = data; int status, idx; if (spi->controller == new_spi->controller) { for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) { status = spi_dev_check_cs(dev, spi, idx, new_spi, 0); if (status) return status; } } return 0; } static void spi_cleanup(struct spi_device *spi) { if (spi->controller->cleanup) spi->controller->cleanup(spi); } static int __spi_add_device(struct spi_device *spi) { struct spi_controller *ctlr = spi->controller; struct device *dev = ctlr->dev.parent; int status, idx; u8 cs; for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) { /* Chipselects are numbered 0..max; validate. */ cs = spi_get_chipselect(spi, idx); if (is_valid_cs(cs) && cs >= ctlr->num_chipselect) { dev_err(dev, "cs%d >= max %d\n", spi_get_chipselect(spi, idx), ctlr->num_chipselect); return -EINVAL; } } /* * Make sure that multiple logical CS doesn't map to the same physical CS. * For example, spi->chip_select[0] != spi->chip_select[1] and so on. */ if (!spi_controller_is_target(ctlr)) { for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) { status = spi_dev_check_cs(dev, spi, idx, spi, idx + 1); if (status) return status; } } /* Set the bus ID string */ spi_dev_set_name(spi); /* * We need to make sure there's no other device with this * chipselect **BEFORE** we call setup(), else we'll trash * its configuration. */ status = bus_for_each_dev(&spi_bus_type, NULL, spi, spi_dev_check); if (status) return status; /* Controller may unregister concurrently */ if (IS_ENABLED(CONFIG_SPI_DYNAMIC) && !device_is_registered(&ctlr->dev)) { return -ENODEV; } if (ctlr->cs_gpiods) { u8 cs; for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) { cs = spi_get_chipselect(spi, idx); if (is_valid_cs(cs)) spi_set_csgpiod(spi, idx, ctlr->cs_gpiods[cs]); } } /* * Drivers may modify this initial i/o setup, but will * normally rely on the device being setup. Devices * using SPI_CS_HIGH can't coexist well otherwise... */ status = spi_setup(spi); if (status < 0) { dev_err(dev, "can't setup %s, status %d\n", dev_name(&spi->dev), status); return status; } /* Device may be bound to an active driver when this returns */ status = device_add(&spi->dev); if (status < 0) { dev_err(dev, "can't add %s, status %d\n", dev_name(&spi->dev), status); spi_cleanup(spi); } else { dev_dbg(dev, "registered child %s\n", dev_name(&spi->dev)); } return status; } /** * spi_add_device - Add spi_device allocated with spi_alloc_device * @spi: spi_device to register * * Companion function to spi_alloc_device. Devices allocated with * spi_alloc_device can be added onto the SPI bus with this function. * * Return: 0 on success; negative errno on failure */ int spi_add_device(struct spi_device *spi) { struct spi_controller *ctlr = spi->controller; int status; /* Set the bus ID string */ spi_dev_set_name(spi); mutex_lock(&ctlr->add_lock); status = __spi_add_device(spi); mutex_unlock(&ctlr->add_lock); return status; } EXPORT_SYMBOL_GPL(spi_add_device); static void spi_set_all_cs_unused(struct spi_device *spi) { u8 idx; for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) spi_set_chipselect(spi, idx, SPI_INVALID_CS); } /** * spi_new_device - instantiate one new SPI device * @ctlr: Controller to which device is connected * @chip: Describes the SPI device * Context: can sleep * * On typical mainboards, this is purely internal; and it's not needed * after board init creates the hard-wired devices. Some development * platforms may not be able to use spi_register_board_info though, and * this is exported so that for example a USB or parport based adapter * driver could add devices (which it would learn about out-of-band). * * Return: the new device, or NULL. */ struct spi_device *spi_new_device(struct spi_controller *ctlr, struct spi_board_info *chip) { struct spi_device *proxy; int status; /* * NOTE: caller did any chip->bus_num checks necessary. * * Also, unless we change the return value convention to use * error-or-pointer (not NULL-or-pointer), troubleshootability * suggests syslogged diagnostics are best here (ugh). */ proxy = spi_alloc_device(ctlr); if (!proxy) return NULL; WARN_ON(strlen(chip->modalias) >= sizeof(proxy->modalias)); /* Use provided chip-select for proxy device */ spi_set_all_cs_unused(proxy); spi_set_chipselect(proxy, 0, chip->chip_select); proxy->max_speed_hz = chip->max_speed_hz; proxy->mode = chip->mode; proxy->irq = chip->irq; strscpy(proxy->modalias, chip->modalias, sizeof(proxy->modalias)); proxy->dev.platform_data = (void *) chip->platform_data; proxy->controller_data = chip->controller_data; proxy->controller_state = NULL; /* * By default spi->chip_select[0] will hold the physical CS number, * so set bit 0 in spi->cs_index_mask. */ proxy->cs_index_mask = BIT(0); if (chip->swnode) { status = device_add_software_node(&proxy->dev, chip->swnode); if (status) { dev_err(&ctlr->dev, "failed to add software node to '%s': %d\n", chip->modalias, status); goto err_dev_put; } } status = spi_add_device(proxy); if (status < 0) goto err_dev_put; return proxy; err_dev_put: device_remove_software_node(&proxy->dev); spi_dev_put(proxy); return NULL; } EXPORT_SYMBOL_GPL(spi_new_device); /** * spi_unregister_device - unregister a single SPI device * @spi: spi_device to unregister * * Start making the passed SPI device vanish. Normally this would be handled * by spi_unregister_controller(). */ void spi_unregister_device(struct spi_device *spi) { if (!spi) return; if (spi->dev.of_node) { of_node_clear_flag(spi->dev.of_node, OF_POPULATED); of_node_put(spi->dev.of_node); } if (ACPI_COMPANION(&spi->dev)) acpi_device_clear_enumerated(ACPI_COMPANION(&spi->dev)); device_remove_software_node(&spi->dev); device_del(&spi->dev); spi_cleanup(spi); put_device(&spi->dev); } EXPORT_SYMBOL_GPL(spi_unregister_device); static void spi_match_controller_to_boardinfo(struct spi_controller *ctlr, struct spi_board_info *bi) { struct spi_device *dev; if (ctlr->bus_num != bi->bus_num) return; dev = spi_new_device(ctlr, bi); if (!dev) dev_err(ctlr->dev.parent, "can't create new device for %s\n", bi->modalias); } /** * spi_register_board_info - register SPI devices for a given board * @info: array of chip descriptors * @n: how many descriptors are provided * Context: can sleep * * Board-specific early init code calls this (probably during arch_initcall) * with segments of the SPI device table. Any device nodes are created later, * after the relevant parent SPI controller (bus_num) is defined. We keep * this table of devices forever, so that reloading a controller driver will * not make Linux forget about these hard-wired devices. * * Other code can also call this, e.g. a particular add-on board might provide * SPI devices through its expansion connector, so code initializing that board * would naturally declare its SPI devices. * * The board info passed can safely be __initdata ... but be careful of * any embedded pointers (platform_data, etc), they're copied as-is. * * Return: zero on success, else a negative error code. */ int spi_register_board_info(struct spi_board_info const *info, unsigned n) { struct boardinfo *bi; int i; if (!n) return 0; bi = kcalloc(n, sizeof(*bi), GFP_KERNEL); if (!bi) return -ENOMEM; for (i = 0; i < n; i++, bi++, info++) { struct spi_controller *ctlr; memcpy(&bi->board_info, info, sizeof(*info)); mutex_lock(&board_lock); list_add_tail(&bi->list, &board_list); list_for_each_entry(ctlr, &spi_controller_list, list) spi_match_controller_to_boardinfo(ctlr, &bi->board_info); mutex_unlock(&board_lock); } return 0; } /*-------------------------------------------------------------------------*/ /* Core methods for SPI resource management */ /** * spi_res_alloc - allocate a spi resource that is life-cycle managed * during the processing of a spi_message while using * spi_transfer_one * @spi: the SPI device for which we allocate memory * @release: the release code to execute for this resource * @size: size to alloc and return * @gfp: GFP allocation flags * * Return: the pointer to the allocated data * * This may get enhanced in the future to allocate from a memory pool * of the @spi_device or @spi_controller to avoid repeated allocations. */ static void *spi_res_alloc(struct spi_device *spi, spi_res_release_t release, size_t size, gfp_t gfp) { struct spi_res *sres; sres = kzalloc(sizeof(*sres) + size, gfp); if (!sres) return NULL; INIT_LIST_HEAD(&sres->entry); sres->release = release; return sres->data; } /** * spi_res_free - free an SPI resource * @res: pointer to the custom data of a resource */ static void spi_res_free(void *res) { struct spi_res *sres = container_of(res, struct spi_res, data); if (!res) return; WARN_ON(!list_empty(&sres->entry)); kfree(sres); } /** * spi_res_add - add a spi_res to the spi_message * @message: the SPI message * @res: the spi_resource */ static void spi_res_add(struct spi_message *message, void *res) { struct spi_res *sres = container_of(res, struct spi_res, data); WARN_ON(!list_empty(&sres->entry)); list_add_tail(&sres->entry, &message->resources); } /** * spi_res_release - release all SPI resources for this message * @ctlr: the @spi_controller * @message: the @spi_message */ static void spi_res_release(struct spi_controller *ctlr, struct spi_message *message) { struct spi_res *res, *tmp; list_for_each_entry_safe_reverse(res, tmp, &message->resources, entry) { if (res->release) res->release(ctlr, message, res->data); list_del(&res->entry); kfree(res); } } /*-------------------------------------------------------------------------*/ #define spi_for_each_valid_cs(spi, idx) \ for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) \ if (!(spi->cs_index_mask & BIT(idx))) {} else static inline bool spi_is_last_cs(struct spi_device *spi) { u8 idx; bool last = false; spi_for_each_valid_cs(spi, idx) { if (spi->controller->last_cs[idx] == spi_get_chipselect(spi, idx)) last = true; } return last; } static void spi_toggle_csgpiod(struct spi_device *spi, u8 idx, bool enable, bool activate) { /* * Historically ACPI has no means of the GPIO polarity and * thus the SPISerialBus() resource defines it on the per-chip * basis. In order to avoid a chain of negations, the GPIO * polarity is considered being Active High. Even for the cases * when _DSD() is involved (in the updated versions of ACPI) * the GPIO CS polarity must be defined Active High to avoid * ambiguity. That's why we use enable, that takes SPI_CS_HIGH * into account. */ if (has_acpi_companion(&spi->dev)) gpiod_set_value_cansleep(spi_get_csgpiod(spi, idx), !enable); else /* Polarity handled by GPIO library */ gpiod_set_value_cansleep(spi_get_csgpiod(spi, idx), activate); if (activate) spi_delay_exec(&spi->cs_setup, NULL); else spi_delay_exec(&spi->cs_inactive, NULL); } static void spi_set_cs(struct spi_device *spi, bool enable, bool force) { bool activate = enable; u8 idx; /* * Avoid calling into the driver (or doing delays) if the chip select * isn't actually changing from the last time this was called. */ if (!force && ((enable && spi->controller->last_cs_index_mask == spi->cs_index_mask && spi_is_last_cs(spi)) || (!enable && spi->controller->last_cs_index_mask == spi->cs_index_mask && !spi_is_last_cs(spi))) && (spi->controller->last_cs_mode_high == (spi->mode & SPI_CS_HIGH))) return; trace_spi_set_cs(spi, activate); spi->controller->last_cs_index_mask = spi->cs_index_mask; for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) spi->controller->last_cs[idx] = enable ? spi_get_chipselect(spi, 0) : SPI_INVALID_CS; spi->controller->last_cs_mode_high = spi->mode & SPI_CS_HIGH; if (spi->mode & SPI_CS_HIGH) enable = !enable; /* * Handle chip select delays for GPIO based CS or controllers without * programmable chip select timing. */ if ((spi_is_csgpiod(spi) || !spi->controller->set_cs_timing) && !activate) spi_delay_exec(&spi->cs_hold, NULL); if (spi_is_csgpiod(spi)) { if (!(spi->mode & SPI_NO_CS)) { spi_for_each_valid_cs(spi, idx) { if (spi_get_csgpiod(spi, idx)) spi_toggle_csgpiod(spi, idx, enable, activate); } } /* Some SPI masters need both GPIO CS & slave_select */ if ((spi->controller->flags & SPI_CONTROLLER_GPIO_SS) && spi->controller->set_cs) spi->controller->set_cs(spi, !enable); } else if (spi->controller->set_cs) { spi->controller->set_cs(spi, !enable); } if (spi_is_csgpiod(spi) || !spi->controller->set_cs_timing) { if (activate) spi_delay_exec(&spi->cs_setup, NULL); else spi_delay_exec(&spi->cs_inactive, NULL); } } #ifdef CONFIG_HAS_DMA static int spi_map_buf_attrs(struct spi_controller *ctlr, struct device *dev, struct sg_table *sgt, void *buf, size_t len, enum dma_data_direction dir, unsigned long attrs) { const bool vmalloced_buf = is_vmalloc_addr(buf); unsigned int max_seg_size = dma_get_max_seg_size(dev); #ifdef CONFIG_HIGHMEM const bool kmap_buf = ((unsigned long)buf >= PKMAP_BASE && (unsigned long)buf < (PKMAP_BASE + (LAST_PKMAP * PAGE_SIZE))); #else const bool kmap_buf = false; #endif int desc_len; int sgs; struct page *vm_page; struct scatterlist *sg; void *sg_buf; size_t min; int i, ret; if (vmalloced_buf || kmap_buf) { desc_len = min_t(unsigned long, max_seg_size, PAGE_SIZE); sgs = DIV_ROUND_UP(len + offset_in_page(buf), desc_len); } else if (virt_addr_valid(buf)) { desc_len = min_t(size_t, max_seg_size, ctlr->max_dma_len); sgs = DIV_ROUND_UP(len, desc_len); } else { return -EINVAL; } ret = sg_alloc_table(sgt, sgs, GFP_KERNEL); if (ret != 0) return ret; sg = &sgt->sgl[0]; for (i = 0; i < sgs; i++) { if (vmalloced_buf || kmap_buf) { /* * Next scatterlist entry size is the minimum between * the desc_len and the remaining buffer length that * fits in a page. */ min = min_t(size_t, desc_len, min_t(size_t, len, PAGE_SIZE - offset_in_page(buf))); if (vmalloced_buf) vm_page = vmalloc_to_page(buf); else vm_page = kmap_to_page(buf); if (!vm_page) { sg_free_table(sgt); return -ENOMEM; } sg_set_page(sg, vm_page, min, offset_in_page(buf)); } else { min = min_t(size_t, len, desc_len); sg_buf = buf; sg_set_buf(sg, sg_buf, min); } buf += min; len -= min; sg = sg_next(sg); } ret = dma_map_sgtable(dev, sgt, dir, attrs); if (ret < 0) { sg_free_table(sgt); return ret; } return 0; } int spi_map_buf(struct spi_controller *ctlr, struct device *dev, struct sg_table *sgt, void *buf, size_t len, enum dma_data_direction dir) { return spi_map_buf_attrs(ctlr, dev, sgt, buf, len, dir, 0); } static void spi_unmap_buf_attrs(struct spi_controller *ctlr, struct device *dev, struct sg_table *sgt, enum dma_data_direction dir, unsigned long attrs) { dma_unmap_sgtable(dev, sgt, dir, attrs); sg_free_table(sgt); sgt->orig_nents = 0; sgt->nents = 0; } void spi_unmap_buf(struct spi_controller *ctlr, struct device *dev, struct sg_table *sgt, enum dma_data_direction dir) { spi_unmap_buf_attrs(ctlr, dev, sgt, dir, 0); } static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg) { struct device *tx_dev, *rx_dev; struct spi_transfer *xfer; int ret; if (!ctlr->can_dma) return 0; if (ctlr->dma_tx) tx_dev = ctlr->dma_tx->device->dev; else if (ctlr->dma_map_dev) tx_dev = ctlr->dma_map_dev; else tx_dev = ctlr->dev.parent; if (ctlr->dma_rx) rx_dev = ctlr->dma_rx->device->dev; else if (ctlr->dma_map_dev) rx_dev = ctlr->dma_map_dev; else rx_dev = ctlr->dev.parent; ret = -ENOMSG; list_for_each_entry(xfer, &msg->transfers, transfer_list) { /* The sync is done before each transfer. */ unsigned long attrs = DMA_ATTR_SKIP_CPU_SYNC; if (!ctlr->can_dma(ctlr, msg->spi, xfer)) continue; if (xfer->tx_buf != NULL) { ret = spi_map_buf_attrs(ctlr, tx_dev, &xfer->tx_sg, (void *)xfer->tx_buf, xfer->len, DMA_TO_DEVICE, attrs); if (ret != 0) return ret; xfer->tx_sg_mapped = true; } if (xfer->rx_buf != NULL) { ret = spi_map_buf_attrs(ctlr, rx_dev, &xfer->rx_sg, xfer->rx_buf, xfer->len, DMA_FROM_DEVICE, attrs); if (ret != 0) { spi_unmap_buf_attrs(ctlr, tx_dev, &xfer->tx_sg, DMA_TO_DEVICE, attrs); return ret; } xfer->rx_sg_mapped = true; } } /* No transfer has been mapped, bail out with success */ if (ret) return 0; ctlr->cur_rx_dma_dev = rx_dev; ctlr->cur_tx_dma_dev = tx_dev; return 0; } static int __spi_unmap_msg(struct spi_controller *ctlr, struct spi_message *msg) { struct device *rx_dev = ctlr->cur_rx_dma_dev; struct device *tx_dev = ctlr->cur_tx_dma_dev; struct spi_transfer *xfer; list_for_each_entry(xfer, &msg->transfers, transfer_list) { /* The sync has already been done after each transfer. */ unsigned long attrs = DMA_ATTR_SKIP_CPU_SYNC; if (xfer->rx_sg_mapped) spi_unmap_buf_attrs(ctlr, rx_dev, &xfer->rx_sg, DMA_FROM_DEVICE, attrs); xfer->rx_sg_mapped = false; if (xfer->tx_sg_mapped) spi_unmap_buf_attrs(ctlr, tx_dev, &xfer->tx_sg, DMA_TO_DEVICE, attrs); xfer->tx_sg_mapped = false; } return 0; } static void spi_dma_sync_for_device(struct spi_controller *ctlr, struct spi_transfer *xfer) { struct device *rx_dev = ctlr->cur_rx_dma_dev; struct device *tx_dev = ctlr->cur_tx_dma_dev; if (xfer->tx_sg_mapped) dma_sync_sgtable_for_device(tx_dev, &xfer->tx_sg, DMA_TO_DEVICE); if (xfer->rx_sg_mapped) dma_sync_sgtable_for_device(rx_dev, &xfer->rx_sg, DMA_FROM_DEVICE); } static void spi_dma_sync_for_cpu(struct spi_controller *ctlr, struct spi_transfer *xfer) { struct device *rx_dev = ctlr->cur_rx_dma_dev; struct device *tx_dev = ctlr->cur_tx_dma_dev; if (xfer->rx_sg_mapped) dma_sync_sgtable_for_cpu(rx_dev, &xfer->rx_sg, DMA_FROM_DEVICE); if (xfer->tx_sg_mapped) dma_sync_sgtable_for_cpu(tx_dev, &xfer->tx_sg, DMA_TO_DEVICE); } #else /* !CONFIG_HAS_DMA */ static inline int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg) { return 0; } static inline int __spi_unmap_msg(struct spi_controller *ctlr, struct spi_message *msg) { return 0; } static void spi_dma_sync_for_device(struct spi_controller *ctrl, struct spi_transfer *xfer) { } static void spi_dma_sync_for_cpu(struct spi_controller *ctrl, struct spi_transfer *xfer) { } #endif /* !CONFIG_HAS_DMA */ static inline int spi_unmap_msg(struct spi_controller *ctlr, struct spi_message *msg) { struct spi_transfer *xfer; list_for_each_entry(xfer, &msg->transfers, transfer_list) { /* * Restore the original value of tx_buf or rx_buf if they are * NULL. */ if (xfer->tx_buf == ctlr->dummy_tx) xfer->tx_buf = NULL; if (xfer->rx_buf == ctlr->dummy_rx) xfer->rx_buf = NULL; } return __spi_unmap_msg(ctlr, msg); } static int spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg) { struct spi_transfer *xfer; void *tmp; unsigned int max_tx, max_rx; if ((ctlr->flags & (SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX)) && !(msg->spi->mode & SPI_3WIRE)) { max_tx = 0; max_rx = 0; list_for_each_entry(xfer, &msg->transfers, transfer_list) { if ((ctlr->flags & SPI_CONTROLLER_MUST_TX) && !xfer->tx_buf) max_tx = max(xfer->len, max_tx); if ((ctlr->flags & SPI_CONTROLLER_MUST_RX) && !xfer->rx_buf) max_rx = max(xfer->len, max_rx); } if (max_tx) { tmp = krealloc(ctlr->dummy_tx, max_tx, GFP_KERNEL | GFP_DMA | __GFP_ZERO); if (!tmp) return -ENOMEM; ctlr->dummy_tx = tmp; } if (max_rx) { tmp = krealloc(ctlr->dummy_rx, max_rx, GFP_KERNEL | GFP_DMA); if (!tmp) return -ENOMEM; ctlr->dummy_rx = tmp; } if (max_tx || max_rx) { list_for_each_entry(xfer, &msg->transfers, transfer_list) { if (!xfer->len) continue; if (!xfer->tx_buf) xfer->tx_buf = ctlr->dummy_tx; if (!xfer->rx_buf) xfer->rx_buf = ctlr->dummy_rx; } } } return __spi_map_msg(ctlr, msg); } static int spi_transfer_wait(struct spi_controller *ctlr, struct spi_message *msg, struct spi_transfer *xfer) { struct spi_statistics __percpu *statm = ctlr->pcpu_statistics; struct spi_statistics __percpu *stats = msg->spi->pcpu_statistics; u32 speed_hz = xfer->speed_hz; unsigned long long ms; if (spi_controller_is_slave(ctlr)) { if (wait_for_completion_interruptible(&ctlr->xfer_completion)) { dev_dbg(&msg->spi->dev, "SPI transfer interrupted\n"); return -EINTR; } } else { if (!speed_hz) speed_hz = 100000; /* * For each byte we wait for 8 cycles of the SPI clock. * Since speed is defined in Hz and we want milliseconds, * use respective multiplier, but before the division, * otherwise we may get 0 for short transfers. */ ms = 8LL * MSEC_PER_SEC * xfer->len; do_div(ms, speed_hz); /* * Increase it twice and add 200 ms tolerance, use * predefined maximum in case of overflow. */ ms += ms + 200; if (ms > UINT_MAX) ms = UINT_MAX; ms = wait_for_completion_timeout(&ctlr->xfer_completion, msecs_to_jiffies(ms)); if (ms == 0) { SPI_STATISTICS_INCREMENT_FIELD(statm, timedout); SPI_STATISTICS_INCREMENT_FIELD(stats, timedout); dev_err(&msg->spi->dev, "SPI transfer timed out\n"); return -ETIMEDOUT; } if (xfer->error & SPI_TRANS_FAIL_IO) return -EIO; } return 0; } static void _spi_transfer_delay_ns(u32 ns) { if (!ns) return; if (ns <= NSEC_PER_USEC) { ndelay(ns); } else { u32 us = DIV_ROUND_UP(ns, NSEC_PER_USEC); if (us <= 10) udelay(us); else usleep_range(us, us + DIV_ROUND_UP(us, 10)); } } int spi_delay_to_ns(struct spi_delay *_delay, struct spi_transfer *xfer) { u32 delay = _delay->value; u32 unit = _delay->unit; u32 hz; if (!delay) return 0; switch (unit) { case SPI_DELAY_UNIT_USECS: delay *= NSEC_PER_USEC; break; case SPI_DELAY_UNIT_NSECS: /* Nothing to do here */ break; case SPI_DELAY_UNIT_SCK: /* Clock cycles need to be obtained from spi_transfer */ if (!xfer) return -EINVAL; /* * If there is unknown effective speed, approximate it * by underestimating with half of the requested Hz. */ hz = xfer->effective_speed_hz ?: xfer->speed_hz / 2; if (!hz) return -EINVAL; /* Convert delay to nanoseconds */ delay *= DIV_ROUND_UP(NSEC_PER_SEC, hz); break; default: return -EINVAL; } return delay; } EXPORT_SYMBOL_GPL(spi_delay_to_ns); int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer) { int delay; might_sleep(); if (!_delay) return -EINVAL; delay = spi_delay_to_ns(_delay, xfer); if (delay < 0) return delay; _spi_transfer_delay_ns(delay); return 0; } EXPORT_SYMBOL_GPL(spi_delay_exec); static void _spi_transfer_cs_change_delay(struct spi_message *msg, struct spi_transfer *xfer) { u32 default_delay_ns = 10 * NSEC_PER_USEC; u32 delay = xfer->cs_change_delay.value; u32 unit = xfer->cs_change_delay.unit; int ret; /* Return early on "fast" mode - for everything but USECS */ if (!delay) { if (unit == SPI_DELAY_UNIT_USECS) _spi_transfer_delay_ns(default_delay_ns); return; } ret = spi_delay_exec(&xfer->cs_change_delay, xfer); if (ret) { dev_err_once(&msg->spi->dev, "Use of unsupported delay unit %i, using default of %luus\n", unit, default_delay_ns / NSEC_PER_USEC); _spi_transfer_delay_ns(default_delay_ns); } } void spi_transfer_cs_change_delay_exec(struct spi_message *msg, struct spi_transfer *xfer) { _spi_transfer_cs_change_delay(msg, xfer); } EXPORT_SYMBOL_GPL(spi_transfer_cs_change_delay_exec); /* * spi_transfer_one_message - Default implementation of transfer_one_message() * * This is a standard implementation of transfer_one_message() for * drivers which implement a transfer_one() operation. It provides * standard handling of delays and chip select management. */ static int spi_transfer_one_message(struct spi_controller *ctlr, struct spi_message *msg) { struct spi_transfer *xfer; bool keep_cs = false; int ret = 0; struct spi_statistics __percpu *statm = ctlr->pcpu_statistics; struct spi_statistics __percpu *stats = msg->spi->pcpu_statistics; xfer = list_first_entry(&msg->transfers, struct spi_transfer, transfer_list); spi_set_cs(msg->spi, !xfer->cs_off, false); SPI_STATISTICS_INCREMENT_FIELD(statm, messages); SPI_STATISTICS_INCREMENT_FIELD(stats, messages); list_for_each_entry(xfer, &msg->transfers, transfer_list) { trace_spi_transfer_start(msg, xfer); spi_statistics_add_transfer_stats(statm, xfer, msg); spi_statistics_add_transfer_stats(stats, xfer, msg); if (!ctlr->ptp_sts_supported) { xfer->ptp_sts_word_pre = 0; ptp_read_system_prets(xfer->ptp_sts); } if ((xfer->tx_buf || xfer->rx_buf) && xfer->len) { reinit_completion(&ctlr->xfer_completion); fallback_pio: spi_dma_sync_for_device(ctlr, xfer); ret = ctlr->transfer_one(ctlr, msg->spi, xfer); if (ret < 0) { spi_dma_sync_for_cpu(ctlr, xfer); if ((xfer->tx_sg_mapped || xfer->rx_sg_mapped) && (xfer->error & SPI_TRANS_FAIL_NO_START)) { __spi_unmap_msg(ctlr, msg); ctlr->fallback = true; xfer->error &= ~SPI_TRANS_FAIL_NO_START; goto fallback_pio; } SPI_STATISTICS_INCREMENT_FIELD(statm, errors); SPI_STATISTICS_INCREMENT_FIELD(stats, errors); dev_err(&msg->spi->dev, "SPI transfer failed: %d\n", ret); goto out; } if (ret > 0) { ret = spi_transfer_wait(ctlr, msg, xfer); if (ret < 0) msg->status = ret; } spi_dma_sync_for_cpu(ctlr, xfer); } else { if (xfer->len) dev_err(&msg->spi->dev, "Bufferless transfer has length %u\n", xfer->len); } if (!ctlr->ptp_sts_supported) { ptp_read_system_postts(xfer->ptp_sts); xfer->ptp_sts_word_post = xfer->len; } trace_spi_transfer_stop(msg, xfer); if (msg->status != -EINPROGRESS) goto out; spi_transfer_delay_exec(xfer); if (xfer->cs_change) { if (list_is_last(&xfer->transfer_list, &msg->transfers)) { keep_cs = true; } else { if (!xfer->cs_off) spi_set_cs(msg->spi, false, false); _spi_transfer_cs_change_delay(msg, xfer); if (!list_next_entry(xfer, transfer_list)->cs_off) spi_set_cs(msg->spi, true, false); } } else if (!list_is_last(&xfer->transfer_list, &msg->transfers) && xfer->cs_off != list_next_entry(xfer, transfer_list)->cs_off) { spi_set_cs(msg->spi, xfer->cs_off, false); } msg->actual_length += xfer->len; } out: if (ret != 0 || !keep_cs) spi_set_cs(msg->spi, false, false); if (msg->status == -EINPROGRESS) msg->status = ret; if (msg->status && ctlr->handle_err) ctlr->handle_err(ctlr, msg); spi_finalize_current_message(ctlr); return ret; } /** * spi_finalize_current_transfer - report completion of a transfer * @ctlr: the controller reporting completion * * Called by SPI drivers using the core transfer_one_message() * implementation to notify it that the current interrupt driven * transfer has finished and the next one may be scheduled. */ void spi_finalize_current_transfer(struct spi_controller *ctlr) { complete(&ctlr->xfer_completion); } EXPORT_SYMBOL_GPL(spi_finalize_current_transfer); static void spi_idle_runtime_pm(struct spi_controller *ctlr) { if (ctlr->auto_runtime_pm) { pm_runtime_mark_last_busy(ctlr->dev.parent); pm_runtime_put_autosuspend(ctlr->dev.parent); } } static int __spi_pump_transfer_message(struct spi_controller *ctlr, struct spi_message *msg, bool was_busy) { struct spi_transfer *xfer; int ret; if (!was_busy && ctlr->auto_runtime_pm) { ret = pm_runtime_get_sync(ctlr->dev.parent); if (ret < 0) { pm_runtime_put_noidle(ctlr->dev.parent); dev_err(&ctlr->dev, "Failed to power device: %d\n", ret); msg->status = ret; spi_finalize_current_message(ctlr); return ret; } } if (!was_busy) trace_spi_controller_busy(ctlr); if (!was_busy && ctlr->prepare_transfer_hardware) { ret = ctlr->prepare_transfer_hardware(ctlr); if (ret) { dev_err(&ctlr->dev, "failed to prepare transfer hardware: %d\n", ret); if (ctlr->auto_runtime_pm) pm_runtime_put(ctlr->dev.parent); msg->status = ret; spi_finalize_current_message(ctlr); return ret; } } trace_spi_message_start(msg); if (ctlr->prepare_message) { ret = ctlr->prepare_message(ctlr, msg); if (ret) { dev_err(&ctlr->dev, "failed to prepare message: %d\n", ret); msg->status = ret; spi_finalize_current_message(ctlr); return ret; } msg->prepared = true; } ret = spi_map_msg(ctlr, msg); if (ret) { msg->status = ret; spi_finalize_current_message(ctlr); return ret; } if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) { list_for_each_entry(xfer, &msg->transfers, transfer_list) { xfer->ptp_sts_word_pre = 0; ptp_read_system_prets(xfer->ptp_sts); } } /* * Drivers implementation of transfer_one_message() must arrange for * spi_finalize_current_message() to get called. Most drivers will do * this in the calling context, but some don't. For those cases, a * completion is used to guarantee that this function does not return * until spi_finalize_current_message() is done accessing * ctlr->cur_msg. * Use of the following two flags enable to opportunistically skip the * use of the completion since its use involves expensive spin locks. * In case of a race with the context that calls * spi_finalize_current_message() the completion will always be used, * due to strict ordering of these flags using barriers. */ WRITE_ONCE(ctlr->cur_msg_incomplete, true); WRITE_ONCE(ctlr->cur_msg_need_completion, false); reinit_completion(&ctlr->cur_msg_completion); smp_wmb(); /* Make these available to spi_finalize_current_message() */ ret = ctlr->transfer_one_message(ctlr, msg); if (ret) { dev_err(&ctlr->dev, "failed to transfer one message from queue\n"); return ret; } WRITE_ONCE(ctlr->cur_msg_need_completion, true); smp_mb(); /* See spi_finalize_current_message()... */ if (READ_ONCE(ctlr->cur_msg_incomplete)) wait_for_completion(&ctlr->cur_msg_completion); return 0; } /** * __spi_pump_messages - function which processes SPI message queue * @ctlr: controller to process queue for * @in_kthread: true if we are in the context of the message pump thread * * This function checks if there is any SPI message in the queue that * needs processing and if so call out to the driver to initialize hardware * and transfer each message. * * Note that it is called both from the kthread itself and also from * inside spi_sync(); the queue extraction handling at the top of the * function should deal with this safely. */ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread) { struct spi_message *msg; bool was_busy = false; unsigned long flags; int ret; /* Take the I/O mutex */ mutex_lock(&ctlr->io_mutex); /* Lock queue */ spin_lock_irqsave(&ctlr->queue_lock, flags); /* Make sure we are not already running a message */ if (ctlr->cur_msg) goto out_unlock; /* Check if the queue is idle */ if (list_empty(&ctlr->queue) || !ctlr->running) { if (!ctlr->busy) goto out_unlock; /* Defer any non-atomic teardown to the thread */ if (!in_kthread) { if (!ctlr->dummy_rx && !ctlr->dummy_tx && !ctlr->unprepare_transfer_hardware) { spi_idle_runtime_pm(ctlr); ctlr->busy = false; ctlr->queue_empty = true; trace_spi_controller_idle(ctlr); } else { kthread_queue_work(ctlr->kworker, &ctlr->pump_messages); } goto out_unlock; } ctlr->busy = false; spin_unlock_irqrestore(&ctlr->queue_lock, flags); kfree(ctlr->dummy_rx); ctlr->dummy_rx = NULL; kfree(ctlr->dummy_tx); ctlr->dummy_tx = NULL; if (ctlr->unprepare_transfer_hardware && ctlr->unprepare_transfer_hardware(ctlr)) dev_err(&ctlr->dev, "failed to unprepare transfer hardware\n"); spi_idle_runtime_pm(ctlr); trace_spi_controller_idle(ctlr); spin_lock_irqsave(&ctlr->queue_lock, flags); ctlr->queue_empty = true; goto out_unlock; } /* Extract head of queue */ msg = list_first_entry(&ctlr->queue, struct spi_message, queue); ctlr->cur_msg = msg; list_del_init(&msg->queue); if (ctlr->busy) was_busy = true; else ctlr->busy = true; spin_unlock_irqrestore(&ctlr->queue_lock, flags); ret = __spi_pump_transfer_message(ctlr, msg, was_busy); kthread_queue_work(ctlr->kworker, &ctlr->pump_messages); ctlr->cur_msg = NULL; ctlr->fallback = false; mutex_unlock(&ctlr->io_mutex); /* Prod the scheduler in case transfer_one() was busy waiting */ if (!ret) cond_resched(); return; out_unlock: spin_unlock_irqrestore(&ctlr->queue_lock, flags); mutex_unlock(&ctlr->io_mutex); } /** * spi_pump_messages - kthread work function which processes spi message queue * @work: pointer to kthread work struct contained in the controller struct */ static void spi_pump_messages(struct kthread_work *work) { struct spi_controller *ctlr = container_of(work, struct spi_controller, pump_messages); __spi_pump_messages(ctlr, true); } /** * spi_take_timestamp_pre - helper to collect the beginning of the TX timestamp * @ctlr: Pointer to the spi_controller structure of the driver * @xfer: Pointer to the transfer being timestamped * @progress: How many words (not bytes) have been transferred so far * @irqs_off: If true, will disable IRQs and preemption for the duration of the * transfer, for less jitter in time measurement. Only compatible * with PIO drivers. If true, must follow up with * spi_take_timestamp_post or otherwise system will crash. * WARNING: for fully predictable results, the CPU frequency must * also be under control (governor). * * This is a helper for drivers to collect the beginning of the TX timestamp * for the requested byte from the SPI transfer. The frequency with which this * function must be called (once per word, once for the whole transfer, once * per batch of words etc) is arbitrary as long as the @tx buffer offset is * greater than or equal to the requested byte at the time of the call. The * timestamp is only taken once, at the first such call. It is assumed that * the driver advances its @tx buffer pointer monotonically. */ void spi_take_timestamp_pre(struct spi_controller *ctlr, struct spi_transfer *xfer, size_t progress, bool irqs_off) { if (!xfer->ptp_sts) return; if (xfer->timestamped) return; if (progress > xfer->ptp_sts_word_pre) return; /* Capture the resolution of the timestamp */ xfer->ptp_sts_word_pre = progress; if (irqs_off) { local_irq_save(ctlr->irq_flags); preempt_disable(); } ptp_read_system_prets(xfer->ptp_sts); } EXPORT_SYMBOL_GPL(spi_take_timestamp_pre); /** * spi_take_timestamp_post - helper to collect the end of the TX timestamp * @ctlr: Pointer to the spi_controller structure of the driver * @xfer: Pointer to the transfer being timestamped * @progress: How many words (not bytes) have been transferred so far * @irqs_off: If true, will re-enable IRQs and preemption for the local CPU. * * This is a helper for drivers to collect the end of the TX timestamp for * the requested byte from the SPI transfer. Can be called with an arbitrary * frequency: only the first call where @tx exceeds or is equal to the * requested word will be timestamped. */ void spi_take_timestamp_post(struct spi_controller *ctlr, struct spi_transfer *xfer, size_t progress, bool irqs_off) { if (!xfer->ptp_sts) return; if (xfer->timestamped) return; if (progress < xfer->ptp_sts_word_post) return; ptp_read_system_postts(xfer->ptp_sts); if (irqs_off) { local_irq_restore(ctlr->irq_flags); preempt_enable(); } /* Capture the resolution of the timestamp */ xfer->ptp_sts_word_post = progress; xfer->timestamped = 1; } EXPORT_SYMBOL_GPL(spi_take_timestamp_post); /** * spi_set_thread_rt - set the controller to pump at realtime priority * @ctlr: controller to boost priority of * * This can be called because the controller requested realtime priority * (by setting the ->rt value before calling spi_register_controller()) or * because a device on the bus said that its transfers needed realtime * priority. * * NOTE: at the moment if any device on a bus says it needs realtime then * the thread will be at realtime priority for all transfers on that * controller. If this eventually becomes a problem we may see if we can * find a way to boost the priority only temporarily during relevant * transfers. */ static void spi_set_thread_rt(struct spi_controller *ctlr) { dev_info(&ctlr->dev, "will run message pump with realtime priority\n"); sched_set_fifo(ctlr->kworker->task); } static int spi_init_queue(struct spi_controller *ctlr) { ctlr->running = false; ctlr->busy = false; ctlr->queue_empty = true; ctlr->kworker = kthread_create_worker(0, dev_name(&ctlr->dev)); if (IS_ERR(ctlr->kworker)) { dev_err(&ctlr->dev, "failed to create message pump kworker\n"); return PTR_ERR(ctlr->kworker); } kthread_init_work(&ctlr->pump_messages, spi_pump_messages); /* * Controller config will indicate if this controller should run the * message pump with high (realtime) priority to reduce the transfer * latency on the bus by minimising the delay between a transfer * request and the scheduling of the message pump thread. Without this * setting the message pump thread will remain at default priority. */ if (ctlr->rt) spi_set_thread_rt(ctlr); return 0; } /** * spi_get_next_queued_message() - called by driver to check for queued * messages * @ctlr: the controller to check for queued messages * * If there are more messages in the queue, the next message is returned from * this call. * * Return: the next message in the queue, else NULL if the queue is empty. */ struct spi_message *spi_get_next_queued_message(struct spi_controller *ctlr) { struct spi_message *next; unsigned long flags; /* Get a pointer to the next message, if any */ spin_lock_irqsave(&ctlr->queue_lock, flags); next = list_first_entry_or_null(&ctlr->queue, struct spi_message, queue); spin_unlock_irqrestore(&ctlr->queue_lock, flags); return next; } EXPORT_SYMBOL_GPL(spi_get_next_queued_message); /* * __spi_unoptimize_message - shared implementation of spi_unoptimize_message() * and spi_maybe_unoptimize_message() * @msg: the message to unoptimize * * Peripheral drivers should use spi_unoptimize_message() and callers inside * core should use spi_maybe_unoptimize_message() rather than calling this * function directly. * * It is not valid to call this on a message that is not currently optimized. */ static void __spi_unoptimize_message(struct spi_message *msg) { struct spi_controller *ctlr = msg->spi->controller; if (ctlr->unoptimize_message) ctlr->unoptimize_message(msg); spi_res_release(ctlr, msg); msg->optimized = false; msg->opt_state = NULL; } /* * spi_maybe_unoptimize_message - unoptimize msg not managed by a peripheral * @msg: the message to unoptimize * * This function is used to unoptimize a message if and only if it was * optimized by the core (via spi_maybe_optimize_message()). */ static void spi_maybe_unoptimize_message(struct spi_message *msg) { if (!msg->pre_optimized && msg->optimized && !msg->spi->controller->defer_optimize_message) __spi_unoptimize_message(msg); } /** * spi_finalize_current_message() - the current message is complete * @ctlr: the controller to return the message to * * Called by the driver to notify the core that the message in the front of the * queue is complete and can be removed from the queue. */ void spi_finalize_current_message(struct spi_controller *ctlr) { struct spi_transfer *xfer; struct spi_message *mesg; int ret; mesg = ctlr->cur_msg; if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) { list_for_each_entry(xfer, &mesg->transfers, transfer_list) { ptp_read_system_postts(xfer->ptp_sts); xfer->ptp_sts_word_post = xfer->len; } } if (unlikely(ctlr->ptp_sts_supported)) list_for_each_entry(xfer, &mesg->transfers, transfer_list) WARN_ON_ONCE(xfer->ptp_sts && !xfer->timestamped); spi_unmap_msg(ctlr, mesg); if (mesg->prepared && ctlr->unprepare_message) { ret = ctlr->unprepare_message(ctlr, mesg); if (ret) { dev_err(&ctlr->dev, "failed to unprepare message: %d\n", ret); } } mesg->prepared = false; spi_maybe_unoptimize_message(mesg); WRITE_ONCE(ctlr->cur_msg_incomplete, false); smp_mb(); /* See __spi_pump_transfer_message()... */ if (READ_ONCE(ctlr->cur_msg_need_completion)) complete(&ctlr->cur_msg_completion); trace_spi_message_done(mesg); mesg->state = NULL; if (mesg->complete) mesg->complete(mesg->context); } EXPORT_SYMBOL_GPL(spi_finalize_current_message); static int spi_start_queue(struct spi_controller *ctlr) { unsigned long flags; spin_lock_irqsave(&ctlr->queue_lock, flags); if (ctlr->running || ctlr->busy) { spin_unlock_irqrestore(&ctlr->queue_lock, flags); return -EBUSY; } ctlr->running = true; ctlr->cur_msg = NULL; spin_unlock_irqrestore(&ctlr->queue_lock, flags); kthread_queue_work(ctlr->kworker, &ctlr->pump_messages); return 0; } static int spi_stop_queue(struct spi_controller *ctlr) { unsigned int limit = 500; unsigned long flags; /* * This is a bit lame, but is optimized for the common execution path. * A wait_queue on the ctlr->busy could be used, but then the common * execution path (pump_messages) would be required to call wake_up or * friends on every SPI message. Do this instead. */ do { spin_lock_irqsave(&ctlr->queue_lock, flags); if (list_empty(&ctlr->queue) && !ctlr->busy) { ctlr->running = false; spin_unlock_irqrestore(&ctlr->queue_lock, flags); return 0; } spin_unlock_irqrestore(&ctlr->queue_lock, flags); usleep_range(10000, 11000); } while (--limit); return -EBUSY; } static int spi_destroy_queue(struct spi_controller *ctlr) { int ret; ret = spi_stop_queue(ctlr); /* * kthread_flush_worker will block until all work is done. * If the reason that stop_queue timed out is that the work will never * finish, then it does no good to call flush/stop thread, so * return anyway. */ if (ret) { dev_err(&ctlr->dev, "problem destroying queue\n"); return ret; } kthread_destroy_worker(ctlr->kworker); return 0; } static int __spi_queued_transfer(struct spi_device *spi, struct spi_message *msg, bool need_pump) { struct spi_controller *ctlr = spi->controller; unsigned long flags; spin_lock_irqsave(&ctlr->queue_lock, flags); if (!ctlr->running) { spin_unlock_irqrestore(&ctlr->queue_lock, flags); return -ESHUTDOWN; } msg->actual_length = 0; msg->status = -EINPROGRESS; list_add_tail(&msg->queue, &ctlr->queue); ctlr->queue_empty = false; if (!ctlr->busy && need_pump) kthread_queue_work(ctlr->kworker, &ctlr->pump_messages); spin_unlock_irqrestore(&ctlr->queue_lock, flags); return 0; } /** * spi_queued_transfer - transfer function for queued transfers * @spi: SPI device which is requesting transfer * @msg: SPI message which is to handled is queued to driver queue * * Return: zero on success, else a negative error code. */ static int spi_queued_transfer(struct spi_device *spi, struct spi_message *msg) { return __spi_queued_transfer(spi, msg, true); } static int spi_controller_initialize_queue(struct spi_controller *ctlr) { int ret; ctlr->transfer = spi_queued_transfer; if (!ctlr->transfer_one_message) ctlr->transfer_one_message = spi_transfer_one_message; /* Initialize and start queue */ ret = spi_init_queue(ctlr); if (ret) { dev_err(&ctlr->dev, "problem initializing queue\n"); goto err_init_queue; } ctlr->queued = true; ret = spi_start_queue(ctlr); if (ret) { dev_err(&ctlr->dev, "problem starting queue\n"); goto err_start_queue; } return 0; err_start_queue: spi_destroy_queue(ctlr); err_init_queue: return ret; } /** * spi_flush_queue - Send all pending messages in the queue from the callers' * context * @ctlr: controller to process queue for * * This should be used when one wants to ensure all pending messages have been * sent before doing something. Is used by the spi-mem code to make sure SPI * memory operations do not preempt regular SPI transfers that have been queued * before the spi-mem operation. */ void spi_flush_queue(struct spi_controller *ctlr) { if (ctlr->transfer == spi_queued_transfer) __spi_pump_messages(ctlr, false); } /*-------------------------------------------------------------------------*/ #if defined(CONFIG_OF) static void of_spi_parse_dt_cs_delay(struct device_node *nc, struct spi_delay *delay, const char *prop) { u32 value; if (!of_property_read_u32(nc, prop, &value)) { if (value > U16_MAX) { delay->value = DIV_ROUND_UP(value, 1000); delay->unit = SPI_DELAY_UNIT_USECS; } else { delay->value = value; delay->unit = SPI_DELAY_UNIT_NSECS; } } } static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi, struct device_node *nc) { u32 value, cs[SPI_CS_CNT_MAX]; int rc, idx; /* Mode (clock phase/polarity/etc.) */ if (of_property_read_bool(nc, "spi-cpha")) spi->mode |= SPI_CPHA; if (of_property_read_bool(nc, "spi-cpol")) spi->mode |= SPI_CPOL; if (of_property_read_bool(nc, "spi-3wire")) spi->mode |= SPI_3WIRE; if (of_property_read_bool(nc, "spi-lsb-first")) spi->mode |= SPI_LSB_FIRST; if (of_property_read_bool(nc, "spi-cs-high")) spi->mode |= SPI_CS_HIGH; /* Device DUAL/QUAD mode */ if (!of_property_read_u32(nc, "spi-tx-bus-width", &value)) { switch (value) { case 0: spi->mode |= SPI_NO_TX; break; case 1: break; case 2: spi->mode |= SPI_TX_DUAL; break; case 4: spi->mode |= SPI_TX_QUAD; break; case 8: spi->mode |= SPI_TX_OCTAL; break; default: dev_warn(&ctlr->dev, "spi-tx-bus-width %d not supported\n", value); break; } } if (!of_property_read_u32(nc, "spi-rx-bus-width", &value)) { switch (value) { case 0: spi->mode |= SPI_NO_RX; break; case 1: break; case 2: spi->mode |= SPI_RX_DUAL; break; case 4: spi->mode |= SPI_RX_QUAD; break; case 8: spi->mode |= SPI_RX_OCTAL; break; default: dev_warn(&ctlr->dev, "spi-rx-bus-width %d not supported\n", value); break; } } if (spi_controller_is_slave(ctlr)) { if (!of_node_name_eq(nc, "slave")) { dev_err(&ctlr->dev, "%pOF is not called 'slave'\n", nc); return -EINVAL; } return 0; } if (ctlr->num_chipselect > SPI_CS_CNT_MAX) { dev_err(&ctlr->dev, "No. of CS is more than max. no. of supported CS\n"); return -EINVAL; } spi_set_all_cs_unused(spi); /* Device address */ rc = of_property_read_variable_u32_array(nc, "reg", &cs[0], 1, SPI_CS_CNT_MAX); if (rc < 0) { dev_err(&ctlr->dev, "%pOF has no valid 'reg' property (%d)\n", nc, rc); return rc; } if (rc > ctlr->num_chipselect) { dev_err(&ctlr->dev, "%pOF has number of CS > ctlr->num_chipselect (%d)\n", nc, rc); return rc; } if ((of_property_read_bool(nc, "parallel-memories")) && (!(ctlr->flags & SPI_CONTROLLER_MULTI_CS))) { dev_err(&ctlr->dev, "SPI controller doesn't support multi CS\n"); return -EINVAL; } for (idx = 0; idx < rc; idx++) spi_set_chipselect(spi, idx, cs[idx]); /* * By default spi->chip_select[0] will hold the physical CS number, * so set bit 0 in spi->cs_index_mask. */ spi->cs_index_mask = BIT(0); /* Device speed */ if (!of_property_read_u32(nc, "spi-max-frequency", &value)) spi->max_speed_hz = value; /* Device CS delays */ of_spi_parse_dt_cs_delay(nc, &spi->cs_setup, "spi-cs-setup-delay-ns"); of_spi_parse_dt_cs_delay(nc, &spi->cs_hold, "spi-cs-hold-delay-ns"); of_spi_parse_dt_cs_delay(nc, &spi->cs_inactive, "spi-cs-inactive-delay-ns"); return 0; } static struct spi_device * of_register_spi_device(struct spi_controller *ctlr, struct device_node *nc) { struct spi_device *spi; int rc; /* Alloc an spi_device */ spi = spi_alloc_device(ctlr); if (!spi) { dev_err(&ctlr->dev, "spi_device alloc error for %pOF\n", nc); rc = -ENOMEM; goto err_out; } /* Select device driver */ rc = of_alias_from_compatible(nc, spi->modalias, sizeof(spi->modalias)); if (rc < 0) { dev_err(&ctlr->dev, "cannot find modalias for %pOF\n", nc); goto err_out; } rc = of_spi_parse_dt(ctlr, spi, nc); if (rc) goto err_out; /* Store a pointer to the node in the device structure */ of_node_get(nc); device_set_node(&spi->dev, of_fwnode_handle(nc)); /* Register the new device */ rc = spi_add_device(spi); if (rc) { dev_err(&ctlr->dev, "spi_device register error %pOF\n", nc); goto err_of_node_put; } return spi; err_of_node_put: of_node_put(nc); err_out: spi_dev_put(spi); return ERR_PTR(rc); } /** * of_register_spi_devices() - Register child devices onto the SPI bus * @ctlr: Pointer to spi_controller device * * Registers an spi_device for each child node of controller node which * represents a valid SPI slave. */ static void of_register_spi_devices(struct spi_controller *ctlr) { struct spi_device *spi; struct device_node *nc; for_each_available_child_of_node(ctlr->dev.of_node, nc) { if (of_node_test_and_set_flag(nc, OF_POPULATED)) continue; spi = of_register_spi_device(ctlr, nc); if (IS_ERR(spi)) { dev_warn(&ctlr->dev, "Failed to create SPI device for %pOF\n", nc); of_node_clear_flag(nc, OF_POPULATED); } } } #else static void of_register_spi_devices(struct spi_controller *ctlr) { } #endif /** * spi_new_ancillary_device() - Register ancillary SPI device * @spi: Pointer to the main SPI device registering the ancillary device * @chip_select: Chip Select of the ancillary device * * Register an ancillary SPI device; for example some chips have a chip-select * for normal device usage and another one for setup/firmware upload. * * This may only be called from main SPI device's probe routine. * * Return: 0 on success; negative errno on failure */ struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 chip_select) { struct spi_controller *ctlr = spi->controller; struct spi_device *ancillary; int rc; /* Alloc an spi_device */ ancillary = spi_alloc_device(ctlr); if (!ancillary) { rc = -ENOMEM; goto err_out; } strscpy(ancillary->modalias, "dummy", sizeof(ancillary->modalias)); /* Use provided chip-select for ancillary device */ spi_set_all_cs_unused(ancillary); spi_set_chipselect(ancillary, 0, chip_select); /* Take over SPI mode/speed from SPI main device */ ancillary->max_speed_hz = spi->max_speed_hz; ancillary->mode = spi->mode; /* * By default spi->chip_select[0] will hold the physical CS number, * so set bit 0 in spi->cs_index_mask. */ ancillary->cs_index_mask = BIT(0); WARN_ON(!mutex_is_locked(&ctlr->add_lock)); /* Register the new device */ rc = __spi_add_device(ancillary); if (rc) { dev_err(&spi->dev, "failed to register ancillary device\n"); goto err_out; } return ancillary; err_out: spi_dev_put(ancillary); return ERR_PTR(rc); } EXPORT_SYMBOL_GPL(spi_new_ancillary_device); #ifdef CONFIG_ACPI struct acpi_spi_lookup { struct spi_controller *ctlr; u32 max_speed_hz; u32 mode; int irq; u8 bits_per_word; u8 chip_select; int n; int index; }; static int acpi_spi_count(struct acpi_resource *ares, void *data) { struct acpi_resource_spi_serialbus *sb; int *count = data; if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) return 1; sb = &ares->data.spi_serial_bus; if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_SPI) return 1; *count = *count + 1; return 1; } /** * acpi_spi_count_resources - Count the number of SpiSerialBus resources * @adev: ACPI device * * Return: the number of SpiSerialBus resources in the ACPI-device's * resource-list; or a negative error code. */ int acpi_spi_count_resources(struct acpi_device *adev) { LIST_HEAD(r); int count = 0; int ret; ret = acpi_dev_get_resources(adev, &r, acpi_spi_count, &count); if (ret < 0) return ret; acpi_dev_free_resource_list(&r); return count; } EXPORT_SYMBOL_GPL(acpi_spi_count_resources); static void acpi_spi_parse_apple_properties(struct acpi_device *dev, struct acpi_spi_lookup *lookup) { const union acpi_object *obj; if (!x86_apple_machine) return; if (!acpi_dev_get_property(dev, "spiSclkPeriod", ACPI_TYPE_BUFFER, &obj) && obj->buffer.length >= 4) lookup->max_speed_hz = NSEC_PER_SEC / *(u32 *)obj->buffer.pointer; if (!acpi_dev_get_property(dev, "spiWordSize", ACPI_TYPE_BUFFER, &obj) && obj->buffer.length == 8) lookup->bits_per_word = *(u64 *)obj->buffer.pointer; if (!acpi_dev_get_property(dev, "spiBitOrder", ACPI_TYPE_BUFFER, &obj) && obj->buffer.length == 8 && !*(u64 *)obj->buffer.pointer) lookup->mode |= SPI_LSB_FIRST; if (!acpi_dev_get_property(dev, "spiSPO", ACPI_TYPE_BUFFER, &obj) && obj->buffer.length == 8 && *(u64 *)obj->buffer.pointer) lookup->mode |= SPI_CPOL; if (!acpi_dev_get_property(dev, "spiSPH", ACPI_TYPE_BUFFER, &obj) && obj->buffer.length == 8 && *(u64 *)obj->buffer.pointer) lookup->mode |= SPI_CPHA; } static int acpi_spi_add_resource(struct acpi_resource *ares, void *data) { struct acpi_spi_lookup *lookup = data; struct spi_controller *ctlr = lookup->ctlr; if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) { struct acpi_resource_spi_serialbus *sb; acpi_handle parent_handle; acpi_status status; sb = &ares->data.spi_serial_bus; if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_SPI) { if (lookup->index != -1 && lookup->n++ != lookup->index) return 1; status = acpi_get_handle(NULL, sb->resource_source.string_ptr, &parent_handle); if (ACPI_FAILURE(status)) return -ENODEV; if (ctlr) { if (!device_match_acpi_handle(ctlr->dev.parent, parent_handle)) return -ENODEV; } else { struct acpi_device *adev; adev = acpi_fetch_acpi_dev(parent_handle); if (!adev) return -ENODEV; ctlr = acpi_spi_find_controller_by_adev(adev); if (!ctlr) return -EPROBE_DEFER; lookup->ctlr = ctlr; } /* * ACPI DeviceSelection numbering is handled by the * host controller driver in Windows and can vary * from driver to driver. In Linux we always expect * 0 .. max - 1 so we need to ask the driver to * translate between the two schemes. */ if (ctlr->fw_translate_cs) { int cs = ctlr->fw_translate_cs(ctlr, sb->device_selection); if (cs < 0) return cs; lookup->chip_select = cs; } else { lookup->chip_select = sb->device_selection; } lookup->max_speed_hz = sb->connection_speed; lookup->bits_per_word = sb->data_bit_length; if (sb->clock_phase == ACPI_SPI_SECOND_PHASE) lookup->mode |= SPI_CPHA; if (sb->clock_polarity == ACPI_SPI_START_HIGH) lookup->mode |= SPI_CPOL; if (sb->device_polarity == ACPI_SPI_ACTIVE_HIGH) lookup->mode |= SPI_CS_HIGH; } } else if (lookup->irq < 0) { struct resource r; if (acpi_dev_resource_interrupt(ares, 0, &r)) lookup->irq = r.start; } /* Always tell the ACPI core to skip this resource */ return 1; } /** * acpi_spi_device_alloc - Allocate a spi device, and fill it in with ACPI information * @ctlr: controller to which the spi device belongs * @adev: ACPI Device for the spi device * @index: Index of the spi resource inside the ACPI Node * * This should be used to allocate a new SPI device from and ACPI Device node. * The caller is responsible for calling spi_add_device to register the SPI device. * * If ctlr is set to NULL, the Controller for the SPI device will be looked up * using the resource. * If index is set to -1, index is not used. * Note: If index is -1, ctlr must be set. * * Return: a pointer to the new device, or ERR_PTR on error. */ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index) { acpi_handle parent_handle = NULL; struct list_head resource_list; struct acpi_spi_lookup lookup = {}; struct spi_device *spi; int ret; if (!ctlr && index == -1) return ERR_PTR(-EINVAL); lookup.ctlr = ctlr; lookup.irq = -1; lookup.index = index; lookup.n = 0; INIT_LIST_HEAD(&resource_list); ret = acpi_dev_get_resources(adev, &resource_list, acpi_spi_add_resource, &lookup); acpi_dev_free_resource_list(&resource_list); if (ret < 0) /* Found SPI in _CRS but it points to another controller */ return ERR_PTR(ret); if (!lookup.max_speed_hz && ACPI_SUCCESS(acpi_get_parent(adev->handle, &parent_handle)) && device_match_acpi_handle(lookup.ctlr->dev.parent, parent_handle)) { /* Apple does not use _CRS but nested devices for SPI slaves */ acpi_spi_parse_apple_properties(adev, &lookup); } if (!lookup.max_speed_hz) return ERR_PTR(-ENODEV); spi = spi_alloc_device(lookup.ctlr); if (!spi) { dev_err(&lookup.ctlr->dev, "failed to allocate SPI device for %s\n", dev_name(&adev->dev)); return ERR_PTR(-ENOMEM); } spi_set_all_cs_unused(spi); spi_set_chipselect(spi, 0, lookup.chip_select); ACPI_COMPANION_SET(&spi->dev, adev); spi->max_speed_hz = lookup.max_speed_hz; spi->mode |= lookup.mode; spi->irq = lookup.irq; spi->bits_per_word = lookup.bits_per_word; /* * By default spi->chip_select[0] will hold the physical CS number, * so set bit 0 in spi->cs_index_mask. */ spi->cs_index_mask = BIT(0); return spi; } EXPORT_SYMBOL_GPL(acpi_spi_device_alloc); static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, struct acpi_device *adev) { struct spi_device *spi; if (acpi_bus_get_status(adev) || !adev->status.present || acpi_device_enumerated(adev)) return AE_OK; spi = acpi_spi_device_alloc(ctlr, adev, -1); if (IS_ERR(spi)) { if (PTR_ERR(spi) == -ENOMEM) return AE_NO_MEMORY; else return AE_OK; } acpi_set_modalias(adev, acpi_device_hid(adev), spi->modalias, sizeof(spi->modalias)); if (spi->irq < 0) spi->irq = acpi_dev_gpio_irq_get(adev, 0); acpi_device_set_enumerated(adev); adev->power.flags.ignore_parent = true; if (spi_add_device(spi)) { adev->power.flags.ignore_parent = false; dev_err(&ctlr->dev, "failed to add SPI device %s from ACPI\n", dev_name(&adev->dev)); spi_dev_put(spi); } return AE_OK; } static acpi_status acpi_spi_add_device(acpi_handle handle, u32 level, void *data, void **return_value) { struct acpi_device *adev = acpi_fetch_acpi_dev(handle); struct spi_controller *ctlr = data; if (!adev) return AE_OK; return acpi_register_spi_device(ctlr, adev); } #define SPI_ACPI_ENUMERATE_MAX_DEPTH 32 static void acpi_register_spi_devices(struct spi_controller *ctlr) { acpi_status status; acpi_handle handle; handle = ACPI_HANDLE(ctlr->dev.parent); if (!handle) return; status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, SPI_ACPI_ENUMERATE_MAX_DEPTH, acpi_spi_add_device, NULL, ctlr, NULL); if (ACPI_FAILURE(status)) dev_warn(&ctlr->dev, "failed to enumerate SPI slaves\n"); } #else static inline void acpi_register_spi_devices(struct spi_controller *ctlr) {} #endif /* CONFIG_ACPI */ static void spi_controller_release(struct device *dev) { struct spi_controller *ctlr; ctlr = container_of(dev, struct spi_controller, dev); kfree(ctlr); } static struct class spi_master_class = { .name = "spi_master", .dev_release = spi_controller_release, .dev_groups = spi_master_groups, }; #ifdef CONFIG_SPI_SLAVE /** * spi_slave_abort - abort the ongoing transfer request on an SPI slave * controller * @spi: device used for the current transfer */ int spi_slave_abort(struct spi_device *spi) { struct spi_controller *ctlr = spi->controller; if (spi_controller_is_slave(ctlr) && ctlr->slave_abort) return ctlr->slave_abort(ctlr); return -ENOTSUPP; } EXPORT_SYMBOL_GPL(spi_slave_abort); int spi_target_abort(struct spi_device *spi) { struct spi_controller *ctlr = spi->controller; if (spi_controller_is_target(ctlr) && ctlr->target_abort) return ctlr->target_abort(ctlr); return -ENOTSUPP; } EXPORT_SYMBOL_GPL(spi_target_abort); static ssize_t slave_show(struct device *dev, struct device_attribute *attr, char *buf) { struct spi_controller *ctlr = container_of(dev, struct spi_controller, dev); struct device *child; child = device_find_any_child(&ctlr->dev); return sysfs_emit(buf, "%s\n", child ? to_spi_device(child)->modalias : NULL); } static ssize_t slave_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct spi_controller *ctlr = container_of(dev, struct spi_controller, dev); struct spi_device *spi; struct device *child; char name[32]; int rc; rc = sscanf(buf, "%31s", name); if (rc != 1 || !name[0]) return -EINVAL; child = device_find_any_child(&ctlr->dev); if (child) { /* Remove registered slave */ device_unregister(child); put_device(child); } if (strcmp(name, "(null)")) { /* Register new slave */ spi = spi_alloc_device(ctlr); if (!spi) return -ENOMEM; strscpy(spi->modalias, name, sizeof(spi->modalias)); rc = spi_add_device(spi); if (rc) { spi_dev_put(spi); return rc; } } return count; } static DEVICE_ATTR_RW(slave); static struct attribute *spi_slave_attrs[] = { &dev_attr_slave.attr, NULL, }; static const struct attribute_group spi_slave_group = { .attrs = spi_slave_attrs, }; static const struct attribute_group *spi_slave_groups[] = { &spi_controller_statistics_group, &spi_slave_group, NULL, }; static struct class spi_slave_class = { .name = "spi_slave", .dev_release = spi_controller_release, .dev_groups = spi_slave_groups, }; #else extern struct class spi_slave_class; /* dummy */ #endif /** * __spi_alloc_controller - allocate an SPI master or slave controller * @dev: the controller, possibly using the platform_bus * @size: how much zeroed driver-private data to allocate; the pointer to this * memory is in the driver_data field of the returned device, accessible * with spi_controller_get_devdata(); the memory is cacheline aligned; * drivers granting DMA access to portions of their private data need to * round up @size using ALIGN(size, dma_get_cache_alignment()). * @slave: flag indicating whether to allocate an SPI master (false) or SPI * slave (true) controller * Context: can sleep * * This call is used only by SPI controller drivers, which are the * only ones directly touching chip registers. It's how they allocate * an spi_controller structure, prior to calling spi_register_controller(). * * This must be called from context that can sleep. * * The caller is responsible for assigning the bus number and initializing the * controller's methods before calling spi_register_controller(); and (after * errors adding the device) calling spi_controller_put() to prevent a memory * leak. * * Return: the SPI controller structure on success, else NULL. */ struct spi_controller *__spi_alloc_controller(struct device *dev, unsigned int size, bool slave) { struct spi_controller *ctlr; size_t ctlr_size = ALIGN(sizeof(*ctlr), dma_get_cache_alignment()); if (!dev) return NULL; ctlr = kzalloc(size + ctlr_size, GFP_KERNEL); if (!ctlr) return NULL; device_initialize(&ctlr->dev); INIT_LIST_HEAD(&ctlr->queue); spin_lock_init(&ctlr->queue_lock); spin_lock_init(&ctlr->bus_lock_spinlock); mutex_init(&ctlr->bus_lock_mutex); mutex_init(&ctlr->io_mutex); mutex_init(&ctlr->add_lock); ctlr->bus_num = -1; ctlr->num_chipselect = 1; ctlr->slave = slave; if (IS_ENABLED(CONFIG_SPI_SLAVE) && slave) ctlr->dev.class = &spi_slave_class; else ctlr->dev.class = &spi_master_class; ctlr->dev.parent = dev; pm_suspend_ignore_children(&ctlr->dev, true); spi_controller_set_devdata(ctlr, (void *)ctlr + ctlr_size); return ctlr; } EXPORT_SYMBOL_GPL(__spi_alloc_controller); static void devm_spi_release_controller(struct device *dev, void *ctlr) { spi_controller_put(*(struct spi_controller **)ctlr); } /** * __devm_spi_alloc_controller - resource-managed __spi_alloc_controller() * @dev: physical device of SPI controller * @size: how much zeroed driver-private data to allocate * @slave: whether to allocate an SPI master (false) or SPI slave (true) * Context: can sleep * * Allocate an SPI controller and automatically release a reference on it * when @dev is unbound from its driver. Drivers are thus relieved from * having to call spi_controller_put(). * * The arguments to this function are identical to __spi_alloc_controller(). * * Return: the SPI controller structure on success, else NULL. */ struct spi_controller *__devm_spi_alloc_controller(struct device *dev, unsigned int size, bool slave) { struct spi_controller **ptr, *ctlr; ptr = devres_alloc(devm_spi_release_controller, sizeof(*ptr), GFP_KERNEL); if (!ptr) return NULL; ctlr = __spi_alloc_controller(dev, size, slave); if (ctlr) { ctlr->devm_allocated = true; *ptr = ctlr; devres_add(dev, ptr); } else { devres_free(ptr); } return ctlr; } EXPORT_SYMBOL_GPL(__devm_spi_alloc_controller); /** * spi_get_gpio_descs() - grab chip select GPIOs for the master * @ctlr: The SPI master to grab GPIO descriptors for */ static int spi_get_gpio_descs(struct spi_controller *ctlr) { int nb, i; struct gpio_desc **cs; struct device *dev = &ctlr->dev; unsigned long native_cs_mask = 0; unsigned int num_cs_gpios = 0; nb = gpiod_count(dev, "cs"); if (nb < 0) { /* No GPIOs at all is fine, else return the error */ if (nb == -ENOENT) return 0; return nb; } ctlr->num_chipselect = max_t(int, nb, ctlr->num_chipselect); cs = devm_kcalloc(dev, ctlr->num_chipselect, sizeof(*cs), GFP_KERNEL); if (!cs) return -ENOMEM; ctlr->cs_gpiods = cs; for (i = 0; i < nb; i++) { /* * Most chipselects are active low, the inverted * semantics are handled by special quirks in gpiolib, * so initializing them GPIOD_OUT_LOW here means * "unasserted", in most cases this will drive the physical * line high. */ cs[i] = devm_gpiod_get_index_optional(dev, "cs", i, GPIOD_OUT_LOW); if (IS_ERR(cs[i])) return PTR_ERR(cs[i]); if (cs[i]) { /* * If we find a CS GPIO, name it after the device and * chip select line. */ char *gpioname; gpioname = devm_kasprintf(dev, GFP_KERNEL, "%s CS%d", dev_name(dev), i); if (!gpioname) return -ENOMEM; gpiod_set_consumer_name(cs[i], gpioname); num_cs_gpios++; continue; } if (ctlr->max_native_cs && i >= ctlr->max_native_cs) { dev_err(dev, "Invalid native chip select %d\n", i); return -EINVAL; } native_cs_mask |= BIT(i); } ctlr->unused_native_cs = ffs(~native_cs_mask) - 1; if ((ctlr->flags & SPI_CONTROLLER_GPIO_SS) && num_cs_gpios && ctlr->max_native_cs && ctlr->unused_native_cs >= ctlr->max_native_cs) { dev_err(dev, "No unused native chip select available\n"); return -EINVAL; } return 0; } static int spi_controller_check_ops(struct spi_controller *ctlr) { /* * The controller may implement only the high-level SPI-memory like * operations if it does not support regular SPI transfers, and this is * valid use case. * If ->mem_ops or ->mem_ops->exec_op is NULL, we request that at least * one of the ->transfer_xxx() method be implemented. */ if (!ctlr->mem_ops || !ctlr->mem_ops->exec_op) { if (!ctlr->transfer && !ctlr->transfer_one && !ctlr->transfer_one_message) { return -EINVAL; } } return 0; } /* Allocate dynamic bus number using Linux idr */ static int spi_controller_id_alloc(struct spi_controller *ctlr, int start, int end) { int id; mutex_lock(&board_lock); id = idr_alloc(&spi_master_idr, ctlr, start, end, GFP_KERNEL); mutex_unlock(&board_lock); if (WARN(id < 0, "couldn't get idr")) return id == -ENOSPC ? -EBUSY : id; ctlr->bus_num = id; return 0; } /** * spi_register_controller - register SPI master or slave controller * @ctlr: initialized master, originally from spi_alloc_master() or * spi_alloc_slave() * Context: can sleep * * SPI controllers connect to their drivers using some non-SPI bus, * such as the platform bus. The final stage of probe() in that code * includes calling spi_register_controller() to hook up to this SPI bus glue. * * SPI controllers use board specific (often SOC specific) bus numbers, * and board-specific addressing for SPI devices combines those numbers * with chip select numbers. Since SPI does not directly support dynamic * device identification, boards need configuration tables telling which * chip is at which address. * * This must be called from context that can sleep. It returns zero on * success, else a negative error code (dropping the controller's refcount). * After a successful return, the caller is responsible for calling * spi_unregister_controller(). * * Return: zero on success, else a negative error code. */ int spi_register_controller(struct spi_controller *ctlr) { struct device *dev = ctlr->dev.parent; struct boardinfo *bi; int first_dynamic; int status; int idx; if (!dev) return -ENODEV; /* * Make sure all necessary hooks are implemented before registering * the SPI controller. */ status = spi_controller_check_ops(ctlr); if (status) return status; if (ctlr->bus_num < 0) ctlr->bus_num = of_alias_get_id(ctlr->dev.of_node, "spi"); if (ctlr->bus_num >= 0) { /* Devices with a fixed bus num must check-in with the num */ status = spi_controller_id_alloc(ctlr, ctlr->bus_num, ctlr->bus_num + 1); if (status) return status; } if (ctlr->bus_num < 0) { first_dynamic = of_alias_get_highest_id("spi"); if (first_dynamic < 0) first_dynamic = 0; else first_dynamic++; status = spi_controller_id_alloc(ctlr, first_dynamic, 0); if (status) return status; } ctlr->bus_lock_flag = 0; init_completion(&ctlr->xfer_completion); init_completion(&ctlr->cur_msg_completion); if (!ctlr->max_dma_len) ctlr->max_dma_len = INT_MAX; /* * Register the device, then userspace will see it. * Registration fails if the bus ID is in use. */ dev_set_name(&ctlr->dev, "spi%u", ctlr->bus_num); if (!spi_controller_is_slave(ctlr) && ctlr->use_gpio_descriptors) { status = spi_get_gpio_descs(ctlr); if (status) goto free_bus_id; /* * A controller using GPIO descriptors always * supports SPI_CS_HIGH if need be. */ ctlr->mode_bits |= SPI_CS_HIGH; } /* * Even if it's just one always-selected device, there must * be at least one chipselect. */ if (!ctlr->num_chipselect) { status = -EINVAL; goto free_bus_id; } /* Setting last_cs to SPI_INVALID_CS means no chip selected */ for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) ctlr->last_cs[idx] = SPI_INVALID_CS; status = device_add(&ctlr->dev); if (status < 0) goto free_bus_id; dev_dbg(dev, "registered %s %s\n", spi_controller_is_slave(ctlr) ? "slave" : "master", dev_name(&ctlr->dev)); /* * If we're using a queued driver, start the queue. Note that we don't * need the queueing logic if the driver is only supporting high-level * memory operations. */ if (ctlr->transfer) { dev_info(dev, "controller is unqueued, this is deprecated\n"); } else if (ctlr->transfer_one || ctlr->transfer_one_message) { status = spi_controller_initialize_queue(ctlr); if (status) { device_del(&ctlr->dev); goto free_bus_id; } } /* Add statistics */ ctlr->pcpu_statistics = spi_alloc_pcpu_stats(dev); if (!ctlr->pcpu_statistics) { dev_err(dev, "Error allocating per-cpu statistics\n"); status = -ENOMEM; goto destroy_queue; } mutex_lock(&board_lock); list_add_tail(&ctlr->list, &spi_controller_list); list_for_each_entry(bi, &board_list, list) spi_match_controller_to_boardinfo(ctlr, &bi->board_info); mutex_unlock(&board_lock); /* Register devices from the device tree and ACPI */ of_register_spi_devices(ctlr); acpi_register_spi_devices(ctlr); return status; destroy_queue: spi_destroy_queue(ctlr); free_bus_id: mutex_lock(&board_lock); idr_remove(&spi_master_idr, ctlr->bus_num); mutex_unlock(&board_lock); return status; } EXPORT_SYMBOL_GPL(spi_register_controller); static void devm_spi_unregister(struct device *dev, void *res) { spi_unregister_controller(*(struct spi_controller **)res); } /** * devm_spi_register_controller - register managed SPI master or slave * controller * @dev: device managing SPI controller * @ctlr: initialized controller, originally from spi_alloc_master() or * spi_alloc_slave() * Context: can sleep * * Register a SPI device as with spi_register_controller() which will * automatically be unregistered and freed. * * Return: zero on success, else a negative error code. */ int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr) { struct spi_controller **ptr; int ret; ptr = devres_alloc(devm_spi_unregister, sizeof(*ptr), GFP_KERNEL); if (!ptr) return -ENOMEM; ret = spi_register_controller(ctlr); if (!ret) { *ptr = ctlr; devres_add(dev, ptr); } else { devres_free(ptr); } return ret; } EXPORT_SYMBOL_GPL(devm_spi_register_controller); static int __unregister(struct device *dev, void *null) { spi_unregister_device(to_spi_device(dev)); return 0; } /** * spi_unregister_controller - unregister SPI master or slave controller * @ctlr: the controller being unregistered * Context: can sleep * * This call is used only by SPI controller drivers, which are the * only ones directly touching chip registers. * * This must be called from context that can sleep. * * Note that this function also drops a reference to the controller. */ void spi_unregister_controller(struct spi_controller *ctlr) { struct spi_controller *found; int id = ctlr->bus_num; /* Prevent addition of new devices, unregister existing ones */ if (IS_ENABLED(CONFIG_SPI_DYNAMIC)) mutex_lock(&ctlr->add_lock); device_for_each_child(&ctlr->dev, NULL, __unregister); /* First make sure that this controller was ever added */ mutex_lock(&board_lock); found = idr_find(&spi_master_idr, id); mutex_unlock(&board_lock); if (ctlr->queued) { if (spi_destroy_queue(ctlr)) dev_err(&ctlr->dev, "queue remove failed\n"); } mutex_lock(&board_lock); list_del(&ctlr->list); mutex_unlock(&board_lock); device_del(&ctlr->dev); /* Free bus id */ mutex_lock(&board_lock); if (found == ctlr) idr_remove(&spi_master_idr, id); mutex_unlock(&board_lock); if (IS_ENABLED(CONFIG_SPI_DYNAMIC)) mutex_unlock(&ctlr->add_lock); /* * Release the last reference on the controller if its driver * has not yet been converted to devm_spi_alloc_master/slave(). */ if (!ctlr->devm_allocated) put_device(&ctlr->dev); } EXPORT_SYMBOL_GPL(spi_unregister_controller); static inline int __spi_check_suspended(const struct spi_controller *ctlr) { return ctlr->flags & SPI_CONTROLLER_SUSPENDED ? -ESHUTDOWN : 0; } static inline void __spi_mark_suspended(struct spi_controller *ctlr) { mutex_lock(&ctlr->bus_lock_mutex); ctlr->flags |= SPI_CONTROLLER_SUSPENDED; mutex_unlock(&ctlr->bus_lock_mutex); } static inline void __spi_mark_resumed(struct spi_controller *ctlr) { mutex_lock(&ctlr->bus_lock_mutex); ctlr->flags &= ~SPI_CONTROLLER_SUSPENDED; mutex_unlock(&ctlr->bus_lock_mutex); } int spi_controller_suspend(struct spi_controller *ctlr) { int ret = 0; /* Basically no-ops for non-queued controllers */ if (ctlr->queued) { ret = spi_stop_queue(ctlr); if (ret) dev_err(&ctlr->dev, "queue stop failed\n"); } __spi_mark_suspended(ctlr); return ret; } EXPORT_SYMBOL_GPL(spi_controller_suspend); int spi_controller_resume(struct spi_controller *ctlr) { int ret = 0; __spi_mark_resumed(ctlr); if (ctlr->queued) { ret = spi_start_queue(ctlr); if (ret) dev_err(&ctlr->dev, "queue restart failed\n"); } return ret; } EXPORT_SYMBOL_GPL(spi_controller_resume); /*-------------------------------------------------------------------------*/ /* Core methods for spi_message alterations */ static void __spi_replace_transfers_release(struct spi_controller *ctlr, struct spi_message *msg, void *res) { struct spi_replaced_transfers *rxfer = res; size_t i; /* Call extra callback if requested */ if (rxfer->release) rxfer->release(ctlr, msg, res); /* Insert replaced transfers back into the message */ list_splice(&rxfer->replaced_transfers, rxfer->replaced_after); /* Remove the formerly inserted entries */ for (i = 0; i < rxfer->inserted; i++) list_del(&rxfer->inserted_transfers[i].transfer_list); } /** * spi_replace_transfers - replace transfers with several transfers * and register change with spi_message.resources * @msg: the spi_message we work upon * @xfer_first: the first spi_transfer we want to replace * @remove: number of transfers to remove * @insert: the number of transfers we want to insert instead * @release: extra release code necessary in some circumstances * @extradatasize: extra data to allocate (with alignment guarantees * of struct @spi_transfer) * @gfp: gfp flags * * Returns: pointer to @spi_replaced_transfers, * PTR_ERR(...) in case of errors. */ static struct spi_replaced_transfers *spi_replace_transfers( struct spi_message *msg, struct spi_transfer *xfer_first, size_t remove, size_t insert, spi_replaced_release_t release, size_t extradatasize, gfp_t gfp) { struct spi_replaced_transfers *rxfer; struct spi_transfer *xfer; size_t i; /* Allocate the structure using spi_res */ rxfer = spi_res_alloc(msg->spi, __spi_replace_transfers_release, struct_size(rxfer, inserted_transfers, insert) + extradatasize, gfp); if (!rxfer) return ERR_PTR(-ENOMEM); /* The release code to invoke before running the generic release */ rxfer->release = release; /* Assign extradata */ if (extradatasize) rxfer->extradata = &rxfer->inserted_transfers[insert]; /* Init the replaced_transfers list */ INIT_LIST_HEAD(&rxfer->replaced_transfers); /* * Assign the list_entry after which we should reinsert * the @replaced_transfers - it may be spi_message.messages! */ rxfer->replaced_after = xfer_first->transfer_list.prev; /* Remove the requested number of transfers */ for (i = 0; i < remove; i++) { /* * If the entry after replaced_after it is msg->transfers * then we have been requested to remove more transfers * than are in the list. */ if (rxfer->replaced_after->next == &msg->transfers) { dev_err(&msg->spi->dev, "requested to remove more spi_transfers than are available\n"); /* Insert replaced transfers back into the message */ list_splice(&rxfer->replaced_transfers, rxfer->replaced_after); /* Free the spi_replace_transfer structure... */ spi_res_free(rxfer); /* ...and return with an error */ return ERR_PTR(-EINVAL); } /* * Remove the entry after replaced_after from list of * transfers and add it to list of replaced_transfers. */ list_move_tail(rxfer->replaced_after->next, &rxfer->replaced_transfers); } /* * Create copy of the given xfer with identical settings * based on the first transfer to get removed. */ for (i = 0; i < insert; i++) { /* We need to run in reverse order */ xfer = &rxfer->inserted_transfers[insert - 1 - i]; /* Copy all spi_transfer data */ memcpy(xfer, xfer_first, sizeof(*xfer)); /* Add to list */ list_add(&xfer->transfer_list, rxfer->replaced_after); /* Clear cs_change and delay for all but the last */ if (i) { xfer->cs_change = false; xfer->delay.value = 0; } } /* Set up inserted... */ rxfer->inserted = insert; /* ...and register it with spi_res/spi_message */ spi_res_add(msg, rxfer); return rxfer; } static int __spi_split_transfer_maxsize(struct spi_controller *ctlr, struct spi_message *msg, struct spi_transfer **xferp, size_t maxsize) { struct spi_transfer *xfer = *xferp, *xfers; struct spi_replaced_transfers *srt; size_t offset; size_t count, i; /* Calculate how many we have to replace */ count = DIV_ROUND_UP(xfer->len, maxsize); /* Create replacement */ srt = spi_replace_transfers(msg, xfer, 1, count, NULL, 0, GFP_KERNEL); if (IS_ERR(srt)) return PTR_ERR(srt); xfers = srt->inserted_transfers; /* * Now handle each of those newly inserted spi_transfers. * Note that the replacements spi_transfers all are preset * to the same values as *xferp, so tx_buf, rx_buf and len * are all identical (as well as most others) * so we just have to fix up len and the pointers. */ /* * The first transfer just needs the length modified, so we * run it outside the loop. */ xfers[0].len = min_t(size_t, maxsize, xfer[0].len); /* All the others need rx_buf/tx_buf also set */ for (i = 1, offset = maxsize; i < count; offset += maxsize, i++) { /* Update rx_buf, tx_buf and DMA */ if (xfers[i].rx_buf) xfers[i].rx_buf += offset; if (xfers[i].tx_buf) xfers[i].tx_buf += offset; /* Update length */ xfers[i].len = min(maxsize, xfers[i].len - offset); } /* * We set up xferp to the last entry we have inserted, * so that we skip those already split transfers. */ *xferp = &xfers[count - 1]; /* Increment statistics counters */ SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, transfers_split_maxsize); SPI_STATISTICS_INCREMENT_FIELD(msg->spi->pcpu_statistics, transfers_split_maxsize); return 0; } /** * spi_split_transfers_maxsize - split spi transfers into multiple transfers * when an individual transfer exceeds a * certain size * @ctlr: the @spi_controller for this transfer * @msg: the @spi_message to transform * @maxsize: the maximum when to apply this * * This function allocates resources that are automatically freed during the * spi message unoptimize phase so this function should only be called from * optimize_message callbacks. * * Return: status of transformation */ int spi_split_transfers_maxsize(struct spi_controller *ctlr, struct spi_message *msg, size_t maxsize) { struct spi_transfer *xfer; int ret; /* * Iterate over the transfer_list, * but note that xfer is advanced to the last transfer inserted * to avoid checking sizes again unnecessarily (also xfer does * potentially belong to a different list by the time the * replacement has happened). */ list_for_each_entry(xfer, &msg->transfers, transfer_list) { if (xfer->len > maxsize) { ret = __spi_split_transfer_maxsize(ctlr, msg, &xfer, maxsize); if (ret) return ret; } } return 0; } EXPORT_SYMBOL_GPL(spi_split_transfers_maxsize); /** * spi_split_transfers_maxwords - split SPI transfers into multiple transfers * when an individual transfer exceeds a * certain number of SPI words * @ctlr: the @spi_controller for this transfer * @msg: the @spi_message to transform * @maxwords: the number of words to limit each transfer to * * This function allocates resources that are automatically freed during the * spi message unoptimize phase so this function should only be called from * optimize_message callbacks. * * Return: status of transformation */ int spi_split_transfers_maxwords(struct spi_controller *ctlr, struct spi_message *msg, size_t maxwords) { struct spi_transfer *xfer; /* * Iterate over the transfer_list, * but note that xfer is advanced to the last transfer inserted * to avoid checking sizes again unnecessarily (also xfer does * potentially belong to a different list by the time the * replacement has happened). */ list_for_each_entry(xfer, &msg->transfers, transfer_list) { size_t maxsize; int ret; maxsize = maxwords * roundup_pow_of_two(BITS_TO_BYTES(xfer->bits_per_word)); if (xfer->len > maxsize) { ret = __spi_split_transfer_maxsize(ctlr, msg, &xfer, maxsize); if (ret) return ret; } } return 0; } EXPORT_SYMBOL_GPL(spi_split_transfers_maxwords); /*-------------------------------------------------------------------------*/ /* * Core methods for SPI controller protocol drivers. Some of the * other core methods are currently defined as inline functions. */ static int __spi_validate_bits_per_word(struct spi_controller *ctlr, u8 bits_per_word) { if (ctlr->bits_per_word_mask) { /* Only 32 bits fit in the mask */ if (bits_per_word > 32) return -EINVAL; if (!(ctlr->bits_per_word_mask & SPI_BPW_MASK(bits_per_word))) return -EINVAL; } return 0; } /** * spi_set_cs_timing - configure CS setup, hold, and inactive delays * @spi: the device that requires specific CS timing configuration * * Return: zero on success, else a negative error code. */ static int spi_set_cs_timing(struct spi_device *spi) { struct device *parent = spi->controller->dev.parent; int status = 0; if (spi->controller->set_cs_timing && !spi_get_csgpiod(spi, 0)) { if (spi->controller->auto_runtime_pm) { status = pm_runtime_get_sync(parent); if (status < 0) { pm_runtime_put_noidle(parent); dev_err(&spi->controller->dev, "Failed to power device: %d\n", status); return status; } status = spi->controller->set_cs_timing(spi); pm_runtime_mark_last_busy(parent); pm_runtime_put_autosuspend(parent); } else { status = spi->controller->set_cs_timing(spi); } } return status; } /** * spi_setup - setup SPI mode and clock rate * @spi: the device whose settings are being modified * Context: can sleep, and no requests are queued to the device * * SPI protocol drivers may need to update the transfer mode if the * device doesn't work with its default. They may likewise need * to update clock rates or word sizes from initial values. This function * changes those settings, and must be called from a context that can sleep. * Except for SPI_CS_HIGH, which takes effect immediately, the changes take * effect the next time the device is selected and data is transferred to * or from it. When this function returns, the SPI device is deselected. * * Note that this call will fail if the protocol driver specifies an option * that the underlying controller or its driver does not support. For * example, not all hardware supports wire transfers using nine bit words, * LSB-first wire encoding, or active-high chipselects. * * Return: zero on success, else a negative error code. */ int spi_setup(struct spi_device *spi) { unsigned bad_bits, ugly_bits; int status; /* * Check mode to prevent that any two of DUAL, QUAD and NO_MOSI/MISO * are set at the same time. */ if ((hweight_long(spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD | SPI_NO_TX)) > 1) || (hweight_long(spi->mode & (SPI_RX_DUAL | SPI_RX_QUAD | SPI_NO_RX)) > 1)) { dev_err(&spi->dev, "setup: can not select any two of dual, quad and no-rx/tx at the same time\n"); return -EINVAL; } /* If it is SPI_3WIRE mode, DUAL and QUAD should be forbidden */ if ((spi->mode & SPI_3WIRE) && (spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL | SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL))) return -EINVAL; /* * Help drivers fail *cleanly* when they need options * that aren't supported with their current controller. * SPI_CS_WORD has a fallback software implementation, * so it is ignored here. */ bad_bits = spi->mode & ~(spi->controller->mode_bits | SPI_CS_WORD | SPI_NO_TX | SPI_NO_RX); ugly_bits = bad_bits & (SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL | SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL); if (ugly_bits) { dev_warn(&spi->dev, "setup: ignoring unsupported mode bits %x\n", ugly_bits); spi->mode &= ~ugly_bits; bad_bits &= ~ugly_bits; } if (bad_bits) { dev_err(&spi->dev, "setup: unsupported mode bits %x\n", bad_bits); return -EINVAL; } if (!spi->bits_per_word) { spi->bits_per_word = 8; } else { /* * Some controllers may not support the default 8 bits-per-word * so only perform the check when this is explicitly provided. */ status = __spi_validate_bits_per_word(spi->controller, spi->bits_per_word); if (status) return status; } if (spi->controller->max_speed_hz && (!spi->max_speed_hz || spi->max_speed_hz > spi->controller->max_speed_hz)) spi->max_speed_hz = spi->controller->max_speed_hz; mutex_lock(&spi->controller->io_mutex); if (spi->controller->setup) { status = spi->controller->setup(spi); if (status) { mutex_unlock(&spi->controller->io_mutex); dev_err(&spi->controller->dev, "Failed to setup device: %d\n", status); return status; } } status = spi_set_cs_timing(spi); if (status) { mutex_unlock(&spi->controller->io_mutex); return status; } if (spi->controller->auto_runtime_pm && spi->controller->set_cs) { status = pm_runtime_resume_and_get(spi->controller->dev.parent); if (status < 0) { mutex_unlock(&spi->controller->io_mutex); dev_err(&spi->controller->dev, "Failed to power device: %d\n", status); return status; } /* * We do not want to return positive value from pm_runtime_get, * there are many instances of devices calling spi_setup() and * checking for a non-zero return value instead of a negative * return value. */ status = 0; spi_set_cs(spi, false, true); pm_runtime_mark_last_busy(spi->controller->dev.parent); pm_runtime_put_autosuspend(spi->controller->dev.parent); } else { spi_set_cs(spi, false, true); } mutex_unlock(&spi->controller->io_mutex); if (spi->rt && !spi->controller->rt) { spi->controller->rt = true; spi_set_thread_rt(spi->controller); } trace_spi_setup(spi, status); dev_dbg(&spi->dev, "setup mode %lu, %s%s%s%s%u bits/w, %u Hz max --> %d\n", spi->mode & SPI_MODE_X_MASK, (spi->mode & SPI_CS_HIGH) ? "cs_high, " : "", (spi->mode & SPI_LSB_FIRST) ? "lsb, " : "", (spi->mode & SPI_3WIRE) ? "3wire, " : "", (spi->mode & SPI_LOOP) ? "loopback, " : "", spi->bits_per_word, spi->max_speed_hz, status); return status; } EXPORT_SYMBOL_GPL(spi_setup); static int _spi_xfer_word_delay_update(struct spi_transfer *xfer, struct spi_device *spi) { int delay1, delay2; delay1 = spi_delay_to_ns(&xfer->word_delay, xfer); if (delay1 < 0) return delay1; delay2 = spi_delay_to_ns(&spi->word_delay, xfer); if (delay2 < 0) return delay2; if (delay1 < delay2) memcpy(&xfer->word_delay, &spi->word_delay, sizeof(xfer->word_delay)); return 0; } static int __spi_validate(struct spi_device *spi, struct spi_message *message) { struct spi_controller *ctlr = spi->controller; struct spi_transfer *xfer; int w_size; if (list_empty(&message->transfers)) return -EINVAL; message->spi = spi; /* * Half-duplex links include original MicroWire, and ones with * only one data pin like SPI_3WIRE (switches direction) or where * either MOSI or MISO is missing. They can also be caused by * software limitations. */ if ((ctlr->flags & SPI_CONTROLLER_HALF_DUPLEX) || (spi->mode & SPI_3WIRE)) { unsigned flags = ctlr->flags; list_for_each_entry(xfer, &message->transfers, transfer_list) { if (xfer->rx_buf && xfer->tx_buf) return -EINVAL; if ((flags & SPI_CONTROLLER_NO_TX) && xfer->tx_buf) return -EINVAL; if ((flags & SPI_CONTROLLER_NO_RX) && xfer->rx_buf) return -EINVAL; } } /* * Set transfer bits_per_word and max speed as spi device default if * it is not set for this transfer. * Set transfer tx_nbits and rx_nbits as single transfer default * (SPI_NBITS_SINGLE) if it is not set for this transfer. * Ensure transfer word_delay is at least as long as that required by * device itself. */ message->frame_length = 0; list_for_each_entry(xfer, &message->transfers, transfer_list) { xfer->effective_speed_hz = 0; message->frame_length += xfer->len; if (!xfer->bits_per_word) xfer->bits_per_word = spi->bits_per_word; if (!xfer->speed_hz) xfer->speed_hz = spi->max_speed_hz; if (ctlr->max_speed_hz && xfer->speed_hz > ctlr->max_speed_hz) xfer->speed_hz = ctlr->max_speed_hz; if (__spi_validate_bits_per_word(ctlr, xfer->bits_per_word)) return -EINVAL; /* * SPI transfer length should be multiple of SPI word size * where SPI word size should be power-of-two multiple. */ if (xfer->bits_per_word <= 8) w_size = 1; else if (xfer->bits_per_word <= 16) w_size = 2; else w_size = 4; /* No partial transfers accepted */ if (xfer->len % w_size) return -EINVAL; if (xfer->speed_hz && ctlr->min_speed_hz && xfer->speed_hz < ctlr->min_speed_hz) return -EINVAL; if (xfer->tx_buf && !xfer->tx_nbits) xfer->tx_nbits = SPI_NBITS_SINGLE; if (xfer->rx_buf && !xfer->rx_nbits) xfer->rx_nbits = SPI_NBITS_SINGLE; /* * Check transfer tx/rx_nbits: * 1. check the value matches one of single, dual and quad * 2. check tx/rx_nbits match the mode in spi_device */ if (xfer->tx_buf) { if (spi->mode & SPI_NO_TX) return -EINVAL; if (xfer->tx_nbits != SPI_NBITS_SINGLE && xfer->tx_nbits != SPI_NBITS_DUAL && xfer->tx_nbits != SPI_NBITS_QUAD && xfer->tx_nbits != SPI_NBITS_OCTAL) return -EINVAL; if ((xfer->tx_nbits == SPI_NBITS_DUAL) && !(spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD))) return -EINVAL; if ((xfer->tx_nbits == SPI_NBITS_QUAD) && !(spi->mode & SPI_TX_QUAD)) return -EINVAL; } /* Check transfer rx_nbits */ if (xfer->rx_buf) { if (spi->mode & SPI_NO_RX) return -EINVAL; if (xfer->rx_nbits != SPI_NBITS_SINGLE && xfer->rx_nbits != SPI_NBITS_DUAL && xfer->rx_nbits != SPI_NBITS_QUAD && xfer->rx_nbits != SPI_NBITS_OCTAL) return -EINVAL; if ((xfer->rx_nbits == SPI_NBITS_DUAL) && !(spi->mode & (SPI_RX_DUAL | SPI_RX_QUAD))) return -EINVAL; if ((xfer->rx_nbits == SPI_NBITS_QUAD) && !(spi->mode & SPI_RX_QUAD)) return -EINVAL; } if (_spi_xfer_word_delay_update(xfer, spi)) return -EINVAL; } message->status = -EINPROGRESS; return 0; } /* * spi_split_transfers - generic handling of transfer splitting * @msg: the message to split * * Under certain conditions, a SPI controller may not support arbitrary * transfer sizes or other features required by a peripheral. This function * will split the transfers in the message into smaller transfers that are * supported by the controller. * * Controllers with special requirements not covered here can also split * transfers in the optimize_message() callback. * * Context: can sleep * Return: zero on success, else a negative error code */ static int spi_split_transfers(struct spi_message *msg) { struct spi_controller *ctlr = msg->spi->controller; struct spi_transfer *xfer; int ret; /* * If an SPI controller does not support toggling the CS line on each * transfer (indicated by the SPI_CS_WORD flag) or we are using a GPIO * for the CS line, we can emulate the CS-per-word hardware function by * splitting transfers into one-word transfers and ensuring that * cs_change is set for each transfer. */ if ((msg->spi->mode & SPI_CS_WORD) && (!(ctlr->mode_bits & SPI_CS_WORD) || spi_is_csgpiod(msg->spi))) { ret = spi_split_transfers_maxwords(ctlr, msg, 1); if (ret) return ret; list_for_each_entry(xfer, &msg->transfers, transfer_list) { /* Don't change cs_change on the last entry in the list */ if (list_is_last(&xfer->transfer_list, &msg->transfers)) break; xfer->cs_change = 1; } } else { ret = spi_split_transfers_maxsize(ctlr, msg, spi_max_transfer_size(msg->spi)); if (ret) return ret; } return 0; } /* * __spi_optimize_message - shared implementation for spi_optimize_message() * and spi_maybe_optimize_message() * @spi: the device that will be used for the message * @msg: the message to optimize * * Peripheral drivers will call spi_optimize_message() and the spi core will * call spi_maybe_optimize_message() instead of calling this directly. * * It is not valid to call this on a message that has already been optimized. * * Return: zero on success, else a negative error code */ static int __spi_optimize_message(struct spi_device *spi, struct spi_message *msg) { struct spi_controller *ctlr = spi->controller; int ret; ret = __spi_validate(spi, msg); if (ret) return ret; ret = spi_split_transfers(msg); if (ret) return ret; if (ctlr->optimize_message) { ret = ctlr->optimize_message(msg); if (ret) { spi_res_release(ctlr, msg); return ret; } } msg->optimized = true; return 0; } /* * spi_maybe_optimize_message - optimize message if it isn't already pre-optimized * @spi: the device that will be used for the message * @msg: the message to optimize * Return: zero on success, else a negative error code */ static int spi_maybe_optimize_message(struct spi_device *spi, struct spi_message *msg) { if (spi->controller->defer_optimize_message) { msg->spi = spi; return 0; } if (msg->pre_optimized) return 0; return __spi_optimize_message(spi, msg); } /** * spi_optimize_message - do any one-time validation and setup for a SPI message * @spi: the device that will be used for the message * @msg: the message to optimize * * Peripheral drivers that reuse the same message repeatedly may call this to * perform as much message prep as possible once, rather than repeating it each * time a message transfer is performed to improve throughput and reduce CPU * usage. * * Once a message has been optimized, it cannot be modified with the exception * of updating the contents of any xfer->tx_buf (the pointer can't be changed, * only the data in the memory it points to). * * Calls to this function must be balanced with calls to spi_unoptimize_message() * to avoid leaking resources. * * Context: can sleep * Return: zero on success, else a negative error code */ int spi_optimize_message(struct spi_device *spi, struct spi_message *msg) { int ret; /* * Pre-optimization is not supported and optimization is deferred e.g. * when using spi-mux. */ if (spi->controller->defer_optimize_message) return 0; ret = __spi_optimize_message(spi, msg); if (ret) return ret; /* * This flag indicates that the peripheral driver called spi_optimize_message() * and therefore we shouldn't unoptimize message automatically when finalizing * the message but rather wait until spi_unoptimize_message() is called * by the peripheral driver. */ msg->pre_optimized = true; return 0; } EXPORT_SYMBOL_GPL(spi_optimize_message); /** * spi_unoptimize_message - releases any resources allocated by spi_optimize_message() * @msg: the message to unoptimize * * Calls to this function must be balanced with calls to spi_optimize_message(). * * Context: can sleep */ void spi_unoptimize_message(struct spi_message *msg) { if (msg->spi->controller->defer_optimize_message) return; __spi_unoptimize_message(msg); msg->pre_optimized = false; } EXPORT_SYMBOL_GPL(spi_unoptimize_message); static int __spi_async(struct spi_device *spi, struct spi_message *message) { struct spi_controller *ctlr = spi->controller; struct spi_transfer *xfer; /* * Some controllers do not support doing regular SPI transfers. Return * ENOTSUPP when this is the case. */ if (!ctlr->transfer) return -ENOTSUPP; SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_async); SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics, spi_async); trace_spi_message_submit(message); if (!ctlr->ptp_sts_supported) { list_for_each_entry(xfer, &message->transfers, transfer_list) { xfer->ptp_sts_word_pre = 0; ptp_read_system_prets(xfer->ptp_sts); } } return ctlr->transfer(spi, message); } static void devm_spi_unoptimize_message(void *msg) { spi_unoptimize_message(msg); } /** * devm_spi_optimize_message - managed version of spi_optimize_message() * @dev: the device that manages @msg (usually @spi->dev) * @spi: the device that will be used for the message * @msg: the message to optimize * Return: zero on success, else a negative error code * * spi_unoptimize_message() will automatically be called when the device is * removed. */ int devm_spi_optimize_message(struct device *dev, struct spi_device *spi, struct spi_message *msg) { int ret; ret = spi_optimize_message(spi, msg); if (ret) return ret; return devm_add_action_or_reset(dev, devm_spi_unoptimize_message, msg); } EXPORT_SYMBOL_GPL(devm_spi_optimize_message); /** * spi_async - asynchronous SPI transfer * @spi: device with which data will be exchanged * @message: describes the data transfers, including completion callback * Context: any (IRQs may be blocked, etc) * * This call may be used in_irq and other contexts which can't sleep, * as well as from task contexts which can sleep. * * The completion callback is invoked in a context which can't sleep. * Before that invocation, the value of message->status is undefined. * When the callback is issued, message->status holds either zero (to * indicate complete success) or a negative error code. After that * callback returns, the driver which issued the transfer request may * deallocate the associated memory; it's no longer in use by any SPI * core or controller driver code. * * Note that although all messages to a spi_device are handled in * FIFO order, messages may go to different devices in other orders. * Some device might be higher priority, or have various "hard" access * time requirements, for example. * * On detection of any fault during the transfer, processing of * the entire message is aborted, and the device is deselected. * Until returning from the associated message completion callback, * no other spi_message queued to that device will be processed. * (This rule applies equally to all the synchronous transfer calls, * which are wrappers around this core asynchronous primitive.) * * Return: zero on success, else a negative error code. */ int spi_async(struct spi_device *spi, struct spi_message *message) { struct spi_controller *ctlr = spi->controller; int ret; unsigned long flags; ret = spi_maybe_optimize_message(spi, message); if (ret) return ret; spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags); if (ctlr->bus_lock_flag) ret = -EBUSY; else ret = __spi_async(spi, message); spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags); return ret; } EXPORT_SYMBOL_GPL(spi_async); static void __spi_transfer_message_noqueue(struct spi_controller *ctlr, struct spi_message *msg) { bool was_busy; int ret; mutex_lock(&ctlr->io_mutex); was_busy = ctlr->busy; ctlr->cur_msg = msg; ret = __spi_pump_transfer_message(ctlr, msg, was_busy); if (ret) dev_err(&ctlr->dev, "noqueue transfer failed\n"); ctlr->cur_msg = NULL; ctlr->fallback = false; if (!was_busy) { kfree(ctlr->dummy_rx); ctlr->dummy_rx = NULL; kfree(ctlr->dummy_tx); ctlr->dummy_tx = NULL; if (ctlr->unprepare_transfer_hardware && ctlr->unprepare_transfer_hardware(ctlr)) dev_err(&ctlr->dev, "failed to unprepare transfer hardware\n"); spi_idle_runtime_pm(ctlr); } mutex_unlock(&ctlr->io_mutex); } /*-------------------------------------------------------------------------*/ /* * Utility methods for SPI protocol drivers, layered on * top of the core. Some other utility methods are defined as * inline functions. */ static void spi_complete(void *arg) { complete(arg); } static int __spi_sync(struct spi_device *spi, struct spi_message *message) { DECLARE_COMPLETION_ONSTACK(done); unsigned long flags; int status; struct spi_controller *ctlr = spi->controller; if (__spi_check_suspended(ctlr)) { dev_warn_once(&spi->dev, "Attempted to sync while suspend\n"); return -ESHUTDOWN; } status = spi_maybe_optimize_message(spi, message); if (status) return status; SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_sync); SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics, spi_sync); /* * Checking queue_empty here only guarantees async/sync message * ordering when coming from the same context. It does not need to * guard against reentrancy from a different context. The io_mutex * will catch those cases. */ if (READ_ONCE(ctlr->queue_empty) && !ctlr->must_async) { message->actual_length = 0; message->status = -EINPROGRESS; trace_spi_message_submit(message); SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_sync_immediate); SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics, spi_sync_immediate); __spi_transfer_message_noqueue(ctlr, message); return message->status; } /* * There are messages in the async queue that could have originated * from the same context, so we need to preserve ordering. * Therefor we send the message to the async queue and wait until they * are completed. */ message->complete = spi_complete; message->context = &done; spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags); status = __spi_async(spi, message); spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags); if (status == 0) { wait_for_completion(&done); status = message->status; } message->complete = NULL; message->context = NULL; return status; } /** * spi_sync - blocking/synchronous SPI data transfers * @spi: device with which data will be exchanged * @message: describes the data transfers * Context: can sleep * * This call may only be used from a context that may sleep. The sleep * is non-interruptible, and has no timeout. Low-overhead controller * drivers may DMA directly into and out of the message buffers. * * Note that the SPI device's chip select is active during the message, * and then is normally disabled between messages. Drivers for some * frequently-used devices may want to minimize costs of selecting a chip, * by leaving it selected in anticipation that the next message will go * to the same chip. (That may increase power usage.) * * Also, the caller is guaranteeing that the memory associated with the * message will not be freed before this call returns. * * Return: zero on success, else a negative error code. */ int spi_sync(struct spi_device *spi, struct spi_message *message) { int ret; mutex_lock(&spi->controller->bus_lock_mutex); ret = __spi_sync(spi, message); mutex_unlock(&spi->controller->bus_lock_mutex); return ret; } EXPORT_SYMBOL_GPL(spi_sync); /** * spi_sync_locked - version of spi_sync with exclusive bus usage * @spi: device with which data will be exchanged * @message: describes the data transfers * Context: can sleep * * This call may only be used from a context that may sleep. The sleep * is non-interruptible, and has no timeout. Low-overhead controller * drivers may DMA directly into and out of the message buffers. * * This call should be used by drivers that require exclusive access to the * SPI bus. It has to be preceded by a spi_bus_lock call. The SPI bus must * be released by a spi_bus_unlock call when the exclusive access is over. * * Return: zero on success, else a negative error code. */ int spi_sync_locked(struct spi_device *spi, struct spi_message *message) { return __spi_sync(spi, message); } EXPORT_SYMBOL_GPL(spi_sync_locked); /** * spi_bus_lock - obtain a lock for exclusive SPI bus usage * @ctlr: SPI bus master that should be locked for exclusive bus access * Context: can sleep * * This call may only be used from a context that may sleep. The sleep * is non-interruptible, and has no timeout. * * This call should be used by drivers that require exclusive access to the * SPI bus. The SPI bus must be released by a spi_bus_unlock call when the * exclusive access is over. Data transfer must be done by spi_sync_locked * and spi_async_locked calls when the SPI bus lock is held. * * Return: always zero. */ int spi_bus_lock(struct spi_controller *ctlr) { unsigned long flags; mutex_lock(&ctlr->bus_lock_mutex); spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags); ctlr->bus_lock_flag = 1; spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags); /* Mutex remains locked until spi_bus_unlock() is called */ return 0; } EXPORT_SYMBOL_GPL(spi_bus_lock); /** * spi_bus_unlock - release the lock for exclusive SPI bus usage * @ctlr: SPI bus master that was locked for exclusive bus access * Context: can sleep * * This call may only be used from a context that may sleep. The sleep * is non-interruptible, and has no timeout. * * This call releases an SPI bus lock previously obtained by an spi_bus_lock * call. * * Return: always zero. */ int spi_bus_unlock(struct spi_controller *ctlr) { ctlr->bus_lock_flag = 0; mutex_unlock(&ctlr->bus_lock_mutex); return 0; } EXPORT_SYMBOL_GPL(spi_bus_unlock); /* Portable code must never pass more than 32 bytes */ #define SPI_BUFSIZ max(32, SMP_CACHE_BYTES) static u8 *buf; /** * spi_write_then_read - SPI synchronous write followed by read * @spi: device with which data will be exchanged * @txbuf: data to be written (need not be DMA-safe) * @n_tx: size of txbuf, in bytes * @rxbuf: buffer into which data will be read (need not be DMA-safe) * @n_rx: size of rxbuf, in bytes * Context: can sleep * * This performs a half duplex MicroWire style transaction with the * device, sending txbuf and then reading rxbuf. The return value * is zero for success, else a negative errno status code. * This call may only be used from a context that may sleep. * * Parameters to this routine are always copied using a small buffer. * Performance-sensitive or bulk transfer code should instead use * spi_{async,sync}() calls with DMA-safe buffers. * * Return: zero on success, else a negative error code. */ int spi_write_then_read(struct spi_device *spi, const void *txbuf, unsigned n_tx, void *rxbuf, unsigned n_rx) { static DEFINE_MUTEX(lock); int status; struct spi_message message; struct spi_transfer x[2]; u8 *local_buf; /* * Use preallocated DMA-safe buffer if we can. We can't avoid * copying here, (as a pure convenience thing), but we can * keep heap costs out of the hot path unless someone else is * using the pre-allocated buffer or the transfer is too large. */ if ((n_tx + n_rx) > SPI_BUFSIZ || !mutex_trylock(&lock)) { local_buf = kmalloc(max((unsigned)SPI_BUFSIZ, n_tx + n_rx), GFP_KERNEL | GFP_DMA); if (!local_buf) return -ENOMEM; } else { local_buf = buf; } spi_message_init(&message); memset(x, 0, sizeof(x)); if (n_tx) { x[0].len = n_tx; spi_message_add_tail(&x[0], &message); } if (n_rx) { x[1].len = n_rx; spi_message_add_tail(&x[1], &message); } memcpy(local_buf, txbuf, n_tx); x[0].tx_buf = local_buf; x[1].rx_buf = local_buf + n_tx; /* Do the I/O */ status = spi_sync(spi, &message); if (status == 0) memcpy(rxbuf, x[1].rx_buf, n_rx); if (x[0].tx_buf == buf) mutex_unlock(&lock); else kfree(local_buf); return status; } EXPORT_SYMBOL_GPL(spi_write_then_read); /*-------------------------------------------------------------------------*/ #if IS_ENABLED(CONFIG_OF_DYNAMIC) /* Must call put_device() when done with returned spi_device device */ static struct spi_device *of_find_spi_device_by_node(struct device_node *node) { struct device *dev = bus_find_device_by_of_node(&spi_bus_type, node); return dev ? to_spi_device(dev) : NULL; } /* The spi controllers are not using spi_bus, so we find it with another way */ static struct spi_controller *of_find_spi_controller_by_node(struct device_node *node) { struct device *dev; dev = class_find_device_by_of_node(&spi_master_class, node); if (!dev && IS_ENABLED(CONFIG_SPI_SLAVE)) dev = class_find_device_by_of_node(&spi_slave_class, node); if (!dev) return NULL; /* Reference got in class_find_device */ return container_of(dev, struct spi_controller, dev); } static int of_spi_notify(struct notifier_block *nb, unsigned long action, void *arg) { struct of_reconfig_data *rd = arg; struct spi_controller *ctlr; struct spi_device *spi; switch (of_reconfig_get_state_change(action, arg)) { case OF_RECONFIG_CHANGE_ADD: ctlr = of_find_spi_controller_by_node(rd->dn->parent); if (ctlr == NULL) return NOTIFY_OK; /* Not for us */ if (of_node_test_and_set_flag(rd->dn, OF_POPULATED)) { put_device(&ctlr->dev); return NOTIFY_OK; } /* * Clear the flag before adding the device so that fw_devlink * doesn't skip adding consumers to this device. */ rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE; spi = of_register_spi_device(ctlr, rd->dn); put_device(&ctlr->dev); if (IS_ERR(spi)) { pr_err("%s: failed to create for '%pOF'\n", __func__, rd->dn); of_node_clear_flag(rd->dn, OF_POPULATED); return notifier_from_errno(PTR_ERR(spi)); } break; case OF_RECONFIG_CHANGE_REMOVE: /* Already depopulated? */ if (!of_node_check_flag(rd->dn, OF_POPULATED)) return NOTIFY_OK; /* Find our device by node */ spi = of_find_spi_device_by_node(rd->dn); if (spi == NULL) return NOTIFY_OK; /* No? not meant for us */ /* Unregister takes one ref away */ spi_unregister_device(spi); /* And put the reference of the find */ put_device(&spi->dev); break; } return NOTIFY_OK; } static struct notifier_block spi_of_notifier = { .notifier_call = of_spi_notify, }; #else /* IS_ENABLED(CONFIG_OF_DYNAMIC) */ extern struct notifier_block spi_of_notifier; #endif /* IS_ENABLED(CONFIG_OF_DYNAMIC) */ #if IS_ENABLED(CONFIG_ACPI) static int spi_acpi_controller_match(struct device *dev, const void *data) { return ACPI_COMPANION(dev->parent) == data; } struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev) { struct device *dev; dev = class_find_device(&spi_master_class, NULL, adev, spi_acpi_controller_match); if (!dev && IS_ENABLED(CONFIG_SPI_SLAVE)) dev = class_find_device(&spi_slave_class, NULL, adev, spi_acpi_controller_match); if (!dev) return NULL; return container_of(dev, struct spi_controller, dev); } EXPORT_SYMBOL_GPL(acpi_spi_find_controller_by_adev); static struct spi_device *acpi_spi_find_device_by_adev(struct acpi_device *adev) { struct device *dev; dev = bus_find_device_by_acpi_dev(&spi_bus_type, adev); return to_spi_device(dev); } static int acpi_spi_notify(struct notifier_block *nb, unsigned long value, void *arg) { struct acpi_device *adev = arg; struct spi_controller *ctlr; struct spi_device *spi; switch (value) { case ACPI_RECONFIG_DEVICE_ADD: ctlr = acpi_spi_find_controller_by_adev(acpi_dev_parent(adev)); if (!ctlr) break; acpi_register_spi_device(ctlr, adev); put_device(&ctlr->dev); break; case ACPI_RECONFIG_DEVICE_REMOVE: if (!acpi_device_enumerated(adev)) break; spi = acpi_spi_find_device_by_adev(adev); if (!spi) break; spi_unregister_device(spi); put_device(&spi->dev); break; } return NOTIFY_OK; } static struct notifier_block spi_acpi_notifier = { .notifier_call = acpi_spi_notify, }; #else extern struct notifier_block spi_acpi_notifier; #endif static int __init spi_init(void) { int status; buf = kmalloc(SPI_BUFSIZ, GFP_KERNEL); if (!buf) { status = -ENOMEM; goto err0; } status = bus_register(&spi_bus_type); if (status < 0) goto err1; status = class_register(&spi_master_class); if (status < 0) goto err2; if (IS_ENABLED(CONFIG_SPI_SLAVE)) { status = class_register(&spi_slave_class); if (status < 0) goto err3; } if (IS_ENABLED(CONFIG_OF_DYNAMIC)) WARN_ON(of_reconfig_notifier_register(&spi_of_notifier)); if (IS_ENABLED(CONFIG_ACPI)) WARN_ON(acpi_reconfig_notifier_register(&spi_acpi_notifier)); return 0; err3: class_unregister(&spi_master_class); err2: bus_unregister(&spi_bus_type); err1: kfree(buf); buf = NULL; err0: return status; } /* * A board_info is normally registered in arch_initcall(), * but even essential drivers wait till later. * * REVISIT only boardinfo really needs static linking. The rest (device and * driver registration) _could_ be dynamically linked (modular) ... Costs * include needing to have boardinfo data structures be much more public. */ postcore_initcall(spi_init);
1163 1907 1896 456 435 287 3 332 114 103 223 1864 578 332 615 49 528 498 635 471 341 669 636 143 143 103 255 274 277 277 161 31 160 34 218 170 208 20 80 264 18 256 1126 1087 1088 25 1966 1962 1086 1089 1685 1682 1163 1163 6 2 6 86 86 142 34 110 196 5 192 161 161 187 4 184 474 474 1680 4 1 4 797 797 796 1 1 582 583 579 1 196 5 192 161 161 187 4 184 474 474 1685 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/core/dev_addr_lists.c - Functions for handling net device lists * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com> * * This file contains functions for working with unicast, multicast and device * addresses lists. */ #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/export.h> #include <linux/list.h> #include "dev.h" /* * General list handling functions */ static int __hw_addr_insert(struct netdev_hw_addr_list *list, struct netdev_hw_addr *new, int addr_len) { struct rb_node **ins_point = &list->tree.rb_node, *parent = NULL; struct netdev_hw_addr *ha; while (*ins_point) { int diff; ha = rb_entry(*ins_point, struct netdev_hw_addr, node); diff = memcmp(new->addr, ha->addr, addr_len); if (diff == 0) diff = memcmp(&new->type, &ha->type, sizeof(new->type)); parent = *ins_point; if (diff < 0) ins_point = &parent->rb_left; else if (diff > 0) ins_point = &parent->rb_right; else return -EEXIST; } rb_link_node_rcu(&new->node, parent, ins_point); rb_insert_color(&new->node, &list->tree); return 0; } static struct netdev_hw_addr* __hw_addr_create(const unsigned char *addr, int addr_len, unsigned char addr_type, bool global, bool sync) { struct netdev_hw_addr *ha; int alloc_size; alloc_size = sizeof(*ha); if (alloc_size < L1_CACHE_BYTES) alloc_size = L1_CACHE_BYTES; ha = kmalloc(alloc_size, GFP_ATOMIC); if (!ha) return NULL; memcpy(ha->addr, addr, addr_len); ha->type = addr_type; ha->refcount = 1; ha->global_use = global; ha->synced = sync ? 1 : 0; ha->sync_cnt = 0; return ha; } static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type, bool global, bool sync, int sync_count, bool exclusive) { struct rb_node **ins_point = &list->tree.rb_node, *parent = NULL; struct netdev_hw_addr *ha; if (addr_len > MAX_ADDR_LEN) return -EINVAL; while (*ins_point) { int diff; ha = rb_entry(*ins_point, struct netdev_hw_addr, node); diff = memcmp(addr, ha->addr, addr_len); if (diff == 0) diff = memcmp(&addr_type, &ha->type, sizeof(addr_type)); parent = *ins_point; if (diff < 0) { ins_point = &parent->rb_left; } else if (diff > 0) { ins_point = &parent->rb_right; } else { if (exclusive) return -EEXIST; if (global) { /* check if addr is already used as global */ if (ha->global_use) return 0; else ha->global_use = true; } if (sync) { if (ha->synced && sync_count) return -EEXIST; else ha->synced++; } ha->refcount++; return 0; } } ha = __hw_addr_create(addr, addr_len, addr_type, global, sync); if (!ha) return -ENOMEM; rb_link_node(&ha->node, parent, ins_point); rb_insert_color(&ha->node, &list->tree); list_add_tail_rcu(&ha->list, &list->list); list->count++; return 0; } static int __hw_addr_add(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false, 0, false); } static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, struct netdev_hw_addr *ha, bool global, bool sync) { if (global && !ha->global_use) return -ENOENT; if (sync && !ha->synced) return -ENOENT; if (global) ha->global_use = false; if (sync) ha->synced--; if (--ha->refcount) return 0; rb_erase(&ha->node, &list->tree); list_del_rcu(&ha->list); kfree_rcu(ha, rcu_head); list->count--; return 0; } static struct netdev_hw_addr *__hw_addr_lookup(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { struct rb_node *node; node = list->tree.rb_node; while (node) { struct netdev_hw_addr *ha = rb_entry(node, struct netdev_hw_addr, node); int diff = memcmp(addr, ha->addr, addr_len); if (diff == 0 && addr_type) diff = memcmp(&addr_type, &ha->type, sizeof(addr_type)); if (diff < 0) node = node->rb_left; else if (diff > 0) node = node->rb_right; else return ha; } return NULL; } static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type, bool global, bool sync) { struct netdev_hw_addr *ha = __hw_addr_lookup(list, addr, addr_len, addr_type); if (!ha) return -ENOENT; return __hw_addr_del_entry(list, ha, global, sync); } static int __hw_addr_del(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { return __hw_addr_del_ex(list, addr, addr_len, addr_type, false, false); } static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr *ha, int addr_len) { int err; err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, false, true, ha->sync_cnt, false); if (err && err != -EEXIST) return err; if (!err) { ha->sync_cnt++; ha->refcount++; } return 0; } static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, struct netdev_hw_addr *ha, int addr_len) { int err; err = __hw_addr_del_ex(to_list, ha->addr, addr_len, ha->type, false, true); if (err) return; ha->sync_cnt--; /* address on from list is not marked synced */ __hw_addr_del_entry(from_list, ha, false, false); } static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len) { int err = 0; struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (ha->sync_cnt == ha->refcount) { __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } else { err = __hw_addr_sync_one(to_list, ha, addr_len); if (err) break; } } return err; } /* This function only works where there is a strict 1-1 relationship * between source and destination of they synch. If you ever need to * sync addresses to more then 1 destination, you need to use * __hw_addr_sync_multiple(). */ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len) { int err = 0; struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (!ha->sync_cnt) { err = __hw_addr_sync_one(to_list, ha, addr_len); if (err) break; } else if (ha->refcount == 1) __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } return err; } EXPORT_SYMBOL(__hw_addr_sync); void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len) { struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (ha->sync_cnt) __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } } EXPORT_SYMBOL(__hw_addr_unsync); /** * __hw_addr_sync_dev - Synchronize device's multicast list * @list: address list to synchronize * @dev: device to sync * @sync: function to call if address should be added * @unsync: function to call if address should be removed * * This function is intended to be called from the ndo_set_rx_mode * function of devices that require explicit address add/remove * notifications. The unsync function may be NULL in which case * the addresses requiring removal will simply be removed without * any notification to the device. **/ int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *), int (*unsync)(struct net_device *, const unsigned char *)) { struct netdev_hw_addr *ha, *tmp; int err; /* first go through and flush out any stale entries */ list_for_each_entry_safe(ha, tmp, &list->list, list) { if (!ha->sync_cnt || ha->refcount != 1) continue; /* if unsync is defined and fails defer unsyncing address */ if (unsync && unsync(dev, ha->addr)) continue; ha->sync_cnt--; __hw_addr_del_entry(list, ha, false, false); } /* go through and sync new entries to the list */ list_for_each_entry_safe(ha, tmp, &list->list, list) { if (ha->sync_cnt) continue; err = sync(dev, ha->addr); if (err) return err; ha->sync_cnt++; ha->refcount++; } return 0; } EXPORT_SYMBOL(__hw_addr_sync_dev); /** * __hw_addr_ref_sync_dev - Synchronize device's multicast address list taking * into account references * @list: address list to synchronize * @dev: device to sync * @sync: function to call if address or reference on it should be added * @unsync: function to call if address or some reference on it should removed * * This function is intended to be called from the ndo_set_rx_mode * function of devices that require explicit address or references on it * add/remove notifications. The unsync function may be NULL in which case * the addresses or references on it requiring removal will simply be * removed without any notification to the device. That is responsibility of * the driver to identify and distribute address or references on it between * internal address tables. **/ int __hw_addr_ref_sync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *, int), int (*unsync)(struct net_device *, const unsigned char *, int)) { struct netdev_hw_addr *ha, *tmp; int err, ref_cnt; /* first go through and flush out any unsynced/stale entries */ list_for_each_entry_safe(ha, tmp, &list->list, list) { /* sync if address is not used */ if ((ha->sync_cnt << 1) <= ha->refcount) continue; /* if fails defer unsyncing address */ ref_cnt = ha->refcount - ha->sync_cnt; if (unsync && unsync(dev, ha->addr, ref_cnt)) continue; ha->refcount = (ref_cnt << 1) + 1; ha->sync_cnt = ref_cnt; __hw_addr_del_entry(list, ha, false, false); } /* go through and sync updated/new entries to the list */ list_for_each_entry_safe(ha, tmp, &list->list, list) { /* sync if address added or reused */ if ((ha->sync_cnt << 1) >= ha->refcount) continue; ref_cnt = ha->refcount - ha->sync_cnt; err = sync(dev, ha->addr, ref_cnt); if (err) return err; ha->refcount = ref_cnt << 1; ha->sync_cnt = ref_cnt; } return 0; } EXPORT_SYMBOL(__hw_addr_ref_sync_dev); /** * __hw_addr_ref_unsync_dev - Remove synchronized addresses and references on * it from device * @list: address list to remove synchronized addresses (references on it) from * @dev: device to sync * @unsync: function to call if address and references on it should be removed * * Remove all addresses that were added to the device by * __hw_addr_ref_sync_dev(). This function is intended to be called from the * ndo_stop or ndo_open functions on devices that require explicit address (or * references on it) add/remove notifications. If the unsync function pointer * is NULL then this function can be used to just reset the sync_cnt for the * addresses in the list. **/ void __hw_addr_ref_unsync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *, int)) { struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &list->list, list) { if (!ha->sync_cnt) continue; /* if fails defer unsyncing address */ if (unsync && unsync(dev, ha->addr, ha->sync_cnt)) continue; ha->refcount -= ha->sync_cnt - 1; ha->sync_cnt = 0; __hw_addr_del_entry(list, ha, false, false); } } EXPORT_SYMBOL(__hw_addr_ref_unsync_dev); /** * __hw_addr_unsync_dev - Remove synchronized addresses from device * @list: address list to remove synchronized addresses from * @dev: device to sync * @unsync: function to call if address should be removed * * Remove all addresses that were added to the device by __hw_addr_sync_dev(). * This function is intended to be called from the ndo_stop or ndo_open * functions on devices that require explicit address add/remove * notifications. If the unsync function pointer is NULL then this function * can be used to just reset the sync_cnt for the addresses in the list. **/ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *)) { struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &list->list, list) { if (!ha->sync_cnt) continue; /* if unsync is defined and fails defer unsyncing address */ if (unsync && unsync(dev, ha->addr)) continue; ha->sync_cnt--; __hw_addr_del_entry(list, ha, false, false); } } EXPORT_SYMBOL(__hw_addr_unsync_dev); static void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; list->tree = RB_ROOT; list_for_each_entry_safe(ha, tmp, &list->list, list) { list_del_rcu(&ha->list); kfree_rcu(ha, rcu_head); } list->count = 0; } void __hw_addr_init(struct netdev_hw_addr_list *list) { INIT_LIST_HEAD(&list->list); list->count = 0; list->tree = RB_ROOT; } EXPORT_SYMBOL(__hw_addr_init); /* * Device addresses handling functions */ /* Check that netdev->dev_addr is not written to directly as this would * break the rbtree layout. All changes should go thru dev_addr_set() and co. * Remove this check in mid-2024. */ void dev_addr_check(struct net_device *dev) { if (!memcmp(dev->dev_addr, dev->dev_addr_shadow, MAX_ADDR_LEN)) return; netdev_warn(dev, "Current addr: %*ph\n", MAX_ADDR_LEN, dev->dev_addr); netdev_warn(dev, "Expected addr: %*ph\n", MAX_ADDR_LEN, dev->dev_addr_shadow); netdev_WARN(dev, "Incorrect netdev->dev_addr\n"); } /** * dev_addr_flush - Flush device address list * @dev: device * * Flush device address list and reset ->dev_addr. * * The caller must hold the rtnl_mutex. */ void dev_addr_flush(struct net_device *dev) { /* rtnl_mutex must be held here */ dev_addr_check(dev); __hw_addr_flush(&dev->dev_addrs); dev->dev_addr = NULL; } /** * dev_addr_init - Init device address list * @dev: device * * Init device address list and create the first element, * used by ->dev_addr. * * The caller must hold the rtnl_mutex. */ int dev_addr_init(struct net_device *dev) { unsigned char addr[MAX_ADDR_LEN]; struct netdev_hw_addr *ha; int err; /* rtnl_mutex must be held here */ __hw_addr_init(&dev->dev_addrs); memset(addr, 0, sizeof(addr)); err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr), NETDEV_HW_ADDR_T_LAN); if (!err) { /* * Get the first (previously created) address from the list * and set dev_addr pointer to this location. */ ha = list_first_entry(&dev->dev_addrs.list, struct netdev_hw_addr, list); dev->dev_addr = ha->addr; } return err; } void dev_addr_mod(struct net_device *dev, unsigned int offset, const void *addr, size_t len) { struct netdev_hw_addr *ha; dev_addr_check(dev); ha = container_of(dev->dev_addr, struct netdev_hw_addr, addr[0]); rb_erase(&ha->node, &dev->dev_addrs.tree); memcpy(&ha->addr[offset], addr, len); memcpy(&dev->dev_addr_shadow[offset], addr, len); WARN_ON(__hw_addr_insert(&dev->dev_addrs, ha, dev->addr_len)); } EXPORT_SYMBOL(dev_addr_mod); /** * dev_addr_add - Add a device address * @dev: device * @addr: address to add * @addr_type: address type * * Add a device address to the device or increase the reference count if * it already exists. * * The caller must hold the rtnl_mutex. */ int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type) { int err; ASSERT_RTNL(); err = dev_pre_changeaddr_notify(dev, addr, NULL); if (err) return err; err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; } EXPORT_SYMBOL(dev_addr_add); /** * dev_addr_del - Release a device address. * @dev: device * @addr: address to delete * @addr_type: address type * * Release reference to a device address and remove it from the device * if the reference count drops to zero. * * The caller must hold the rtnl_mutex. */ int dev_addr_del(struct net_device *dev, const unsigned char *addr, unsigned char addr_type) { int err; struct netdev_hw_addr *ha; ASSERT_RTNL(); /* * We can not remove the first address from the list because * dev->dev_addr points to that. */ ha = list_first_entry(&dev->dev_addrs.list, struct netdev_hw_addr, list); if (!memcmp(ha->addr, addr, dev->addr_len) && ha->type == addr_type && ha->refcount == 1) return -ENOENT; err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; } EXPORT_SYMBOL(dev_addr_del); /* * Unicast list handling functions */ /** * dev_uc_add_excl - Add a global secondary unicast address * @dev: device * @addr: address to add */ int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr) { int err; netif_addr_lock_bh(dev); err = __hw_addr_add_ex(&dev->uc, addr, dev->addr_len, NETDEV_HW_ADDR_T_UNICAST, true, false, 0, true); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_uc_add_excl); /** * dev_uc_add - Add a secondary unicast address * @dev: device * @addr: address to add * * Add a secondary unicast address to the device or increase * the reference count if it already exists. */ int dev_uc_add(struct net_device *dev, const unsigned char *addr) { int err; netif_addr_lock_bh(dev); err = __hw_addr_add(&dev->uc, addr, dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_uc_add); /** * dev_uc_del - Release secondary unicast address. * @dev: device * @addr: address to delete * * Release reference to a secondary unicast address and remove it * from the device if the reference count drops to zero. */ int dev_uc_del(struct net_device *dev, const unsigned char *addr) { int err; netif_addr_lock_bh(dev); err = __hw_addr_del(&dev->uc, addr, dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_uc_del); /** * dev_uc_sync - Synchronize device's unicast list to another device * @to: destination device * @from: source device * * Add newly added addresses to the destination device and release * addresses that have no users left. The source device must be * locked by netif_addr_lock_bh. * * This function is intended to be called from the dev->set_rx_mode * function of layered software devices. This function assumes that * addresses will only ever be synced to the @to devices and no other. */ int dev_uc_sync(struct net_device *to, struct net_device *from) { int err = 0; if (to->addr_len != from->addr_len) return -EINVAL; netif_addr_lock(to); err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); netif_addr_unlock(to); return err; } EXPORT_SYMBOL(dev_uc_sync); /** * dev_uc_sync_multiple - Synchronize device's unicast list to another * device, but allow for multiple calls to sync to multiple devices. * @to: destination device * @from: source device * * Add newly added addresses to the destination device and release * addresses that have been deleted from the source. The source device * must be locked by netif_addr_lock_bh. * * This function is intended to be called from the dev->set_rx_mode * function of layered software devices. It allows for a single source * device to be synced to multiple destination devices. */ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from) { int err = 0; if (to->addr_len != from->addr_len) return -EINVAL; netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); netif_addr_unlock(to); return err; } EXPORT_SYMBOL(dev_uc_sync_multiple); /** * dev_uc_unsync - Remove synchronized addresses from the destination device * @to: destination device * @from: source device * * Remove all addresses that were added to the destination device by * dev_uc_sync(). This function is intended to be called from the * dev->stop function of layered software devices. */ void dev_uc_unsync(struct net_device *to, struct net_device *from) { if (to->addr_len != from->addr_len) return; /* netif_addr_lock_bh() uses lockdep subclass 0, this is okay for two * reasons: * 1) This is always called without any addr_list_lock, so as the * outermost one here, it must be 0. * 2) This is called by some callers after unlinking the upper device, * so the dev->lower_level becomes 1 again. * Therefore, the subclass for 'from' is 0, for 'to' is either 1 or * larger. */ netif_addr_lock_bh(from); netif_addr_lock(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_uc_unsync); /** * dev_uc_flush - Flush unicast addresses * @dev: device * * Flush unicast addresses. */ void dev_uc_flush(struct net_device *dev) { netif_addr_lock_bh(dev); __hw_addr_flush(&dev->uc); netif_addr_unlock_bh(dev); } EXPORT_SYMBOL(dev_uc_flush); /** * dev_uc_init - Init unicast address list * @dev: device * * Init unicast address list. */ void dev_uc_init(struct net_device *dev) { __hw_addr_init(&dev->uc); } EXPORT_SYMBOL(dev_uc_init); /* * Multicast list handling functions */ /** * dev_mc_add_excl - Add a global secondary multicast address * @dev: device * @addr: address to add */ int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr) { int err; netif_addr_lock_bh(dev); err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, NETDEV_HW_ADDR_T_MULTICAST, true, false, 0, true); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_mc_add_excl); static int __dev_mc_add(struct net_device *dev, const unsigned char *addr, bool global) { int err; netif_addr_lock_bh(dev); err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, NETDEV_HW_ADDR_T_MULTICAST, global, false, 0, false); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } /** * dev_mc_add - Add a multicast address * @dev: device * @addr: address to add * * Add a multicast address to the device or increase * the reference count if it already exists. */ int dev_mc_add(struct net_device *dev, const unsigned char *addr) { return __dev_mc_add(dev, addr, false); } EXPORT_SYMBOL(dev_mc_add); /** * dev_mc_add_global - Add a global multicast address * @dev: device * @addr: address to add * * Add a global multicast address to the device. */ int dev_mc_add_global(struct net_device *dev, const unsigned char *addr) { return __dev_mc_add(dev, addr, true); } EXPORT_SYMBOL(dev_mc_add_global); static int __dev_mc_del(struct net_device *dev, const unsigned char *addr, bool global) { int err; netif_addr_lock_bh(dev); err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len, NETDEV_HW_ADDR_T_MULTICAST, global, false); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } /** * dev_mc_del - Delete a multicast address. * @dev: device * @addr: address to delete * * Release reference to a multicast address and remove it * from the device if the reference count drops to zero. */ int dev_mc_del(struct net_device *dev, const unsigned char *addr) { return __dev_mc_del(dev, addr, false); } EXPORT_SYMBOL(dev_mc_del); /** * dev_mc_del_global - Delete a global multicast address. * @dev: device * @addr: address to delete * * Release reference to a multicast address and remove it * from the device if the reference count drops to zero. */ int dev_mc_del_global(struct net_device *dev, const unsigned char *addr) { return __dev_mc_del(dev, addr, true); } EXPORT_SYMBOL(dev_mc_del_global); /** * dev_mc_sync - Synchronize device's multicast list to another device * @to: destination device * @from: source device * * Add newly added addresses to the destination device and release * addresses that have no users left. The source device must be * locked by netif_addr_lock_bh. * * This function is intended to be called from the ndo_set_rx_mode * function of layered software devices. */ int dev_mc_sync(struct net_device *to, struct net_device *from) { int err = 0; if (to->addr_len != from->addr_len) return -EINVAL; netif_addr_lock(to); err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); netif_addr_unlock(to); return err; } EXPORT_SYMBOL(dev_mc_sync); /** * dev_mc_sync_multiple - Synchronize device's multicast list to another * device, but allow for multiple calls to sync to multiple devices. * @to: destination device * @from: source device * * Add newly added addresses to the destination device and release * addresses that have no users left. The source device must be * locked by netif_addr_lock_bh. * * This function is intended to be called from the ndo_set_rx_mode * function of layered software devices. It allows for a single * source device to be synced to multiple destination devices. */ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) { int err = 0; if (to->addr_len != from->addr_len) return -EINVAL; netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); netif_addr_unlock(to); return err; } EXPORT_SYMBOL(dev_mc_sync_multiple); /** * dev_mc_unsync - Remove synchronized addresses from the destination device * @to: destination device * @from: source device * * Remove all addresses that were added to the destination device by * dev_mc_sync(). This function is intended to be called from the * dev->stop function of layered software devices. */ void dev_mc_unsync(struct net_device *to, struct net_device *from) { if (to->addr_len != from->addr_len) return; /* See the above comments inside dev_uc_unsync(). */ netif_addr_lock_bh(from); netif_addr_lock(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_mc_unsync); /** * dev_mc_flush - Flush multicast addresses * @dev: device * * Flush multicast addresses. */ void dev_mc_flush(struct net_device *dev) { netif_addr_lock_bh(dev); __hw_addr_flush(&dev->mc); netif_addr_unlock_bh(dev); } EXPORT_SYMBOL(dev_mc_flush); /** * dev_mc_init - Init multicast address list * @dev: device * * Init multicast address list. */ void dev_mc_init(struct net_device *dev) { __hw_addr_init(&dev->mc); } EXPORT_SYMBOL(dev_mc_init);
3 5 5 2 5 12 3 1 7 2 6 1 2 4 1 1 1 1 2 2 2 1 10 1 2 2 1 3 2 1 4 1 1 2 5 5 2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 // SPDX-License-Identifier: GPL-2.0 /* XSKMAP used for AF_XDP sockets * Copyright(c) 2018 Intel Corporation. */ #include <linux/bpf.h> #include <linux/filter.h> #include <net/xdp_sock.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/btf_ids.h> #include "xsk.h" static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, struct xdp_sock __rcu **map_entry) { struct xsk_map_node *node; node = bpf_map_kzalloc(&map->map, sizeof(*node), GFP_ATOMIC | __GFP_NOWARN); if (!node) return ERR_PTR(-ENOMEM); bpf_map_inc(&map->map); atomic_inc(&map->count); node->map = map; node->map_entry = map_entry; return node; } static void xsk_map_node_free(struct xsk_map_node *node) { struct xsk_map *map = node->map; bpf_map_put(&node->map->map); kfree(node); atomic_dec(&map->count); } static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node) { spin_lock_bh(&xs->map_list_lock); list_add_tail(&node->node, &xs->map_list); spin_unlock_bh(&xs->map_list_lock); } static void xsk_map_sock_delete(struct xdp_sock *xs, struct xdp_sock __rcu **map_entry) { struct xsk_map_node *n, *tmp; spin_lock_bh(&xs->map_list_lock); list_for_each_entry_safe(n, tmp, &xs->map_list, node) { if (map_entry == n->map_entry) { list_del(&n->node); xsk_map_node_free(n); } } spin_unlock_bh(&xs->map_list_lock); } static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) { struct xsk_map *m; int numa_node; u64 size; if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size != 4 || attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) return ERR_PTR(-EINVAL); numa_node = bpf_map_attr_numa_node(attr); size = struct_size(m, xsk_map, attr->max_entries); m = bpf_map_area_alloc(size, numa_node); if (!m) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&m->map, attr); spin_lock_init(&m->lock); return &m->map; } static u64 xsk_map_mem_usage(const struct bpf_map *map) { struct xsk_map *m = container_of(map, struct xsk_map, map); return struct_size(m, xsk_map, map->max_entries) + (u64)atomic_read(&m->count) * sizeof(struct xsk_map_node); } static void xsk_map_free(struct bpf_map *map) { struct xsk_map *m = container_of(map, struct xsk_map, map); synchronize_net(); bpf_map_area_free(m); } static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct xsk_map *m = container_of(map, struct xsk_map, map); u32 index = key ? *(u32 *)key : U32_MAX; u32 *next = next_key; if (index >= m->map.max_entries) { *next = 0; return 0; } if (index == m->map.max_entries - 1) return -ENOENT; *next = index + 1; return 0; } static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2; struct bpf_insn *insn = insn_buf; *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *))); *insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map)); *insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp); *insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0); *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); *insn++ = BPF_MOV64_IMM(ret, 0); return insn - insn_buf; } /* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or * by local_bh_disable() (from XDP calls inside NAPI). The * rcu_read_lock_bh_held() below makes lockdep accept both. */ static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) { struct xsk_map *m = container_of(map, struct xsk_map, map); if (key >= map->max_entries) return NULL; return rcu_dereference_check(m->xsk_map[key], rcu_read_lock_bh_held()); } static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) { return __xsk_map_lookup_elem(map, *(u32 *)key); } static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key) { return ERR_PTR(-EOPNOTSUPP); } static long xsk_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct xsk_map *m = container_of(map, struct xsk_map, map); struct xdp_sock __rcu **map_entry; struct xdp_sock *xs, *old_xs; u32 i = *(u32 *)key, fd = *(u32 *)value; struct xsk_map_node *node; struct socket *sock; int err; if (unlikely(map_flags > BPF_EXIST)) return -EINVAL; if (unlikely(i >= m->map.max_entries)) return -E2BIG; sock = sockfd_lookup(fd, &err); if (!sock) return err; if (sock->sk->sk_family != PF_XDP) { sockfd_put(sock); return -EOPNOTSUPP; } xs = (struct xdp_sock *)sock->sk; map_entry = &m->xsk_map[i]; node = xsk_map_node_alloc(m, map_entry); if (IS_ERR(node)) { sockfd_put(sock); return PTR_ERR(node); } spin_lock_bh(&m->lock); old_xs = rcu_dereference_protected(*map_entry, lockdep_is_held(&m->lock)); if (old_xs == xs) { err = 0; goto out; } else if (old_xs && map_flags == BPF_NOEXIST) { err = -EEXIST; goto out; } else if (!old_xs && map_flags == BPF_EXIST) { err = -ENOENT; goto out; } xsk_map_sock_add(xs, node); rcu_assign_pointer(*map_entry, xs); if (old_xs) xsk_map_sock_delete(old_xs, map_entry); spin_unlock_bh(&m->lock); sockfd_put(sock); return 0; out: spin_unlock_bh(&m->lock); sockfd_put(sock); xsk_map_node_free(node); return err; } static long xsk_map_delete_elem(struct bpf_map *map, void *key) { struct xsk_map *m = container_of(map, struct xsk_map, map); struct xdp_sock __rcu **map_entry; struct xdp_sock *old_xs; int k = *(u32 *)key; if (k >= map->max_entries) return -EINVAL; spin_lock_bh(&m->lock); map_entry = &m->xsk_map[k]; old_xs = unrcu_pointer(xchg(map_entry, NULL)); if (old_xs) xsk_map_sock_delete(old_xs, map_entry); spin_unlock_bh(&m->lock); return 0; } static long xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags) { return __bpf_xdp_redirect_map(map, index, flags, 0, __xsk_map_lookup_elem); } void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, struct xdp_sock __rcu **map_entry) { spin_lock_bh(&map->lock); if (rcu_access_pointer(*map_entry) == xs) { rcu_assign_pointer(*map_entry, NULL); xsk_map_sock_delete(xs, map_entry); } spin_unlock_bh(&map->lock); } static bool xsk_map_meta_equal(const struct bpf_map *meta0, const struct bpf_map *meta1) { return meta0->max_entries == meta1->max_entries && bpf_map_meta_equal(meta0, meta1); } BTF_ID_LIST_SINGLE(xsk_map_btf_ids, struct, xsk_map) const struct bpf_map_ops xsk_map_ops = { .map_meta_equal = xsk_map_meta_equal, .map_alloc = xsk_map_alloc, .map_free = xsk_map_free, .map_get_next_key = xsk_map_get_next_key, .map_lookup_elem = xsk_map_lookup_elem, .map_gen_lookup = xsk_map_gen_lookup, .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only, .map_update_elem = xsk_map_update_elem, .map_delete_elem = xsk_map_delete_elem, .map_check_btf = map_check_no_btf, .map_mem_usage = xsk_map_mem_usage, .map_btf_id = &xsk_map_btf_ids[0], .map_redirect = xsk_map_redirect, };
5311 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * include/net/dsa.h - Driver for Distributed Switch Architecture switch chips * Copyright (c) 2008-2009 Marvell Semiconductor */ #ifndef __LINUX_NET_DSA_H #define __LINUX_NET_DSA_H #include <linux/if.h> #include <linux/if_ether.h> #include <linux/list.h> #include <linux/notifier.h> #include <linux/timer.h> #include <linux/workqueue.h> #include <linux/of.h> #include <linux/ethtool.h> #include <linux/net_tstamp.h> #include <linux/phy.h> #include <linux/platform_data/dsa.h> #include <linux/phylink.h> #include <net/devlink.h> #include <net/switchdev.h> struct dsa_8021q_context; struct tc_action; #define DSA_TAG_PROTO_NONE_VALUE 0 #define DSA_TAG_PROTO_BRCM_VALUE 1 #define DSA_TAG_PROTO_BRCM_PREPEND_VALUE 2 #define DSA_TAG_PROTO_DSA_VALUE 3 #define DSA_TAG_PROTO_EDSA_VALUE 4 #define DSA_TAG_PROTO_GSWIP_VALUE 5 #define DSA_TAG_PROTO_KSZ9477_VALUE 6 #define DSA_TAG_PROTO_KSZ9893_VALUE 7 #define DSA_TAG_PROTO_LAN9303_VALUE 8 #define DSA_TAG_PROTO_MTK_VALUE 9 #define DSA_TAG_PROTO_QCA_VALUE 10 #define DSA_TAG_PROTO_TRAILER_VALUE 11 #define DSA_TAG_PROTO_8021Q_VALUE 12 #define DSA_TAG_PROTO_SJA1105_VALUE 13 #define DSA_TAG_PROTO_KSZ8795_VALUE 14 #define DSA_TAG_PROTO_OCELOT_VALUE 15 #define DSA_TAG_PROTO_AR9331_VALUE 16 #define DSA_TAG_PROTO_RTL4_A_VALUE 17 #define DSA_TAG_PROTO_HELLCREEK_VALUE 18 #define DSA_TAG_PROTO_XRS700X_VALUE 19 #define DSA_TAG_PROTO_OCELOT_8021Q_VALUE 20 #define DSA_TAG_PROTO_SEVILLE_VALUE 21 #define DSA_TAG_PROTO_BRCM_LEGACY_VALUE 22 #define DSA_TAG_PROTO_SJA1110_VALUE 23 #define DSA_TAG_PROTO_RTL8_4_VALUE 24 #define DSA_TAG_PROTO_RTL8_4T_VALUE 25 #define DSA_TAG_PROTO_RZN1_A5PSW_VALUE 26 #define DSA_TAG_PROTO_LAN937X_VALUE 27 #define DSA_TAG_PROTO_VSC73XX_8021Q_VALUE 28 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, DSA_TAG_PROTO_BRCM = DSA_TAG_PROTO_BRCM_VALUE, DSA_TAG_PROTO_BRCM_LEGACY = DSA_TAG_PROTO_BRCM_LEGACY_VALUE, DSA_TAG_PROTO_BRCM_PREPEND = DSA_TAG_PROTO_BRCM_PREPEND_VALUE, DSA_TAG_PROTO_DSA = DSA_TAG_PROTO_DSA_VALUE, DSA_TAG_PROTO_EDSA = DSA_TAG_PROTO_EDSA_VALUE, DSA_TAG_PROTO_GSWIP = DSA_TAG_PROTO_GSWIP_VALUE, DSA_TAG_PROTO_KSZ9477 = DSA_TAG_PROTO_KSZ9477_VALUE, DSA_TAG_PROTO_KSZ9893 = DSA_TAG_PROTO_KSZ9893_VALUE, DSA_TAG_PROTO_LAN9303 = DSA_TAG_PROTO_LAN9303_VALUE, DSA_TAG_PROTO_MTK = DSA_TAG_PROTO_MTK_VALUE, DSA_TAG_PROTO_QCA = DSA_TAG_PROTO_QCA_VALUE, DSA_TAG_PROTO_TRAILER = DSA_TAG_PROTO_TRAILER_VALUE, DSA_TAG_PROTO_8021Q = DSA_TAG_PROTO_8021Q_VALUE, DSA_TAG_PROTO_SJA1105 = DSA_TAG_PROTO_SJA1105_VALUE, DSA_TAG_PROTO_KSZ8795 = DSA_TAG_PROTO_KSZ8795_VALUE, DSA_TAG_PROTO_OCELOT = DSA_TAG_PROTO_OCELOT_VALUE, DSA_TAG_PROTO_AR9331 = DSA_TAG_PROTO_AR9331_VALUE, DSA_TAG_PROTO_RTL4_A = DSA_TAG_PROTO_RTL4_A_VALUE, DSA_TAG_PROTO_HELLCREEK = DSA_TAG_PROTO_HELLCREEK_VALUE, DSA_TAG_PROTO_XRS700X = DSA_TAG_PROTO_XRS700X_VALUE, DSA_TAG_PROTO_OCELOT_8021Q = DSA_TAG_PROTO_OCELOT_8021Q_VALUE, DSA_TAG_PROTO_SEVILLE = DSA_TAG_PROTO_SEVILLE_VALUE, DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE, DSA_TAG_PROTO_RTL8_4 = DSA_TAG_PROTO_RTL8_4_VALUE, DSA_TAG_PROTO_RTL8_4T = DSA_TAG_PROTO_RTL8_4T_VALUE, DSA_TAG_PROTO_RZN1_A5PSW = DSA_TAG_PROTO_RZN1_A5PSW_VALUE, DSA_TAG_PROTO_LAN937X = DSA_TAG_PROTO_LAN937X_VALUE, DSA_TAG_PROTO_VSC73XX_8021Q = DSA_TAG_PROTO_VSC73XX_8021Q_VALUE, }; struct dsa_switch; struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); int (*connect)(struct dsa_switch *ds); void (*disconnect)(struct dsa_switch *ds); unsigned int needed_headroom; unsigned int needed_tailroom; const char *name; enum dsa_tag_protocol proto; /* Some tagging protocols either mangle or shift the destination MAC * address, in which case the DSA conduit would drop packets on ingress * if what it understands out of the destination MAC address is not in * its RX filter. */ bool promisc_on_conduit; }; struct dsa_lag { struct net_device *dev; unsigned int id; struct mutex fdb_lock; struct list_head fdbs; refcount_t refcount; }; struct dsa_switch_tree { struct list_head list; /* List of switch ports */ struct list_head ports; /* Notifier chain for switch-wide events */ struct raw_notifier_head nh; /* Tree identifier */ unsigned int index; /* Number of switches attached to this tree */ struct kref refcount; /* Maps offloaded LAG netdevs to a zero-based linear ID for * drivers that need it. */ struct dsa_lag **lags; /* Tagging protocol operations */ const struct dsa_device_ops *tag_ops; /* Default tagging protocol preferred by the switches in this * tree. */ enum dsa_tag_protocol default_proto; /* Has this tree been applied to the hardware? */ bool setup; /* * Configuration data for the platform device that owns * this dsa switch tree instance. */ struct dsa_platform_data *pd; /* List of DSA links composing the routing table */ struct list_head rtable; /* Length of "lags" array */ unsigned int lags_len; /* Track the largest switch index within a tree */ unsigned int last_switch; }; /* LAG IDs are one-based, the dst->lags array is zero-based */ #define dsa_lags_foreach_id(_id, _dst) \ for ((_id) = 1; (_id) <= (_dst)->lags_len; (_id)++) \ if ((_dst)->lags[(_id) - 1]) #define dsa_lag_foreach_port(_dp, _dst, _lag) \ list_for_each_entry((_dp), &(_dst)->ports, list) \ if (dsa_port_offloads_lag((_dp), (_lag))) #define dsa_hsr_foreach_port(_dp, _ds, _hsr) \ list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ if ((_dp)->ds == (_ds) && (_dp)->hsr_dev == (_hsr)) static inline struct dsa_lag *dsa_lag_by_id(struct dsa_switch_tree *dst, unsigned int id) { /* DSA LAG IDs are one-based, dst->lags is zero-based */ return dst->lags[id - 1]; } static inline int dsa_lag_id(struct dsa_switch_tree *dst, struct net_device *lag_dev) { unsigned int id; dsa_lags_foreach_id(id, dst) { struct dsa_lag *lag = dsa_lag_by_id(dst, id); if (lag->dev == lag_dev) return lag->id; } return -ENODEV; } /* TC matchall action types */ enum dsa_port_mall_action_type { DSA_PORT_MALL_MIRROR, DSA_PORT_MALL_POLICER, }; /* TC mirroring entry */ struct dsa_mall_mirror_tc_entry { u8 to_local_port; bool ingress; }; /* TC port policer entry */ struct dsa_mall_policer_tc_entry { u32 burst; u64 rate_bytes_per_sec; }; /* TC matchall entry */ struct dsa_mall_tc_entry { struct list_head list; unsigned long cookie; enum dsa_port_mall_action_type type; union { struct dsa_mall_mirror_tc_entry mirror; struct dsa_mall_policer_tc_entry policer; }; }; struct dsa_bridge { struct net_device *dev; unsigned int num; bool tx_fwd_offload; refcount_t refcount; }; struct dsa_port { /* A CPU port is physically connected to a conduit device. A user port * exposes a network device to user-space, called 'user' here. */ union { struct net_device *conduit; struct net_device *user; }; /* Copy of the tagging protocol operations, for quicker access * in the data path. Valid only for the CPU ports. */ const struct dsa_device_ops *tag_ops; /* Copies for faster access in conduit receive hot path */ struct dsa_switch_tree *dst; struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); struct dsa_switch *ds; unsigned int index; enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU, DSA_PORT_TYPE_DSA, DSA_PORT_TYPE_USER, } type; const char *name; struct dsa_port *cpu_dp; u8 mac[ETH_ALEN]; u8 stp_state; /* Warning: the following bit fields are not atomic, and updating them * can only be done from code paths where concurrency is not possible * (probe time or under rtnl_lock). */ u8 vlan_filtering:1; /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ u8 learning:1; u8 lag_tx_enabled:1; /* conduit state bits, valid only on CPU ports */ u8 conduit_admin_up:1; u8 conduit_oper_up:1; /* Valid only on user ports */ u8 cpu_port_in_lag:1; u8 setup:1; struct device_node *dn; unsigned int ageing_time; struct dsa_bridge *bridge; struct devlink_port devlink_port; struct phylink *pl; struct phylink_config pl_config; struct dsa_lag *lag; struct net_device *hsr_dev; struct list_head list; /* * Original copy of the conduit netdev ethtool_ops */ const struct ethtool_ops *orig_ethtool_ops; /* List of MAC addresses that must be forwarded on this port. * These are only valid on CPU ports and DSA links. */ struct mutex addr_lists_lock; struct list_head fdbs; struct list_head mdbs; struct mutex vlans_lock; union { /* List of VLANs that CPU and DSA ports are members of. * Access to this is serialized by the sleepable @vlans_lock. */ struct list_head vlans; /* List of VLANs that user ports are members of. * Access to this is serialized by netif_addr_lock_bh(). */ struct list_head user_vlans; }; }; static inline struct dsa_port * dsa_phylink_to_port(struct phylink_config *config) { return container_of(config, struct dsa_port, pl_config); } /* TODO: ideally DSA ports would have a single dp->link_dp member, * and no dst->rtable nor this struct dsa_link would be needed, * but this would require some more complex tree walking, * so keep it stupid at the moment and list them all. */ struct dsa_link { struct dsa_port *dp; struct dsa_port *link_dp; struct list_head list; }; enum dsa_db_type { DSA_DB_PORT, DSA_DB_LAG, DSA_DB_BRIDGE, }; struct dsa_db { enum dsa_db_type type; union { const struct dsa_port *dp; struct dsa_lag lag; struct dsa_bridge bridge; }; }; struct dsa_mac_addr { unsigned char addr[ETH_ALEN]; u16 vid; refcount_t refcount; struct list_head list; struct dsa_db db; }; struct dsa_vlan { u16 vid; refcount_t refcount; struct list_head list; }; struct dsa_switch { struct device *dev; /* * Parent switch tree, and switch index. */ struct dsa_switch_tree *dst; unsigned int index; /* Warning: the following bit fields are not atomic, and updating them * can only be done from code paths where concurrency is not possible * (probe time or under rtnl_lock). */ u32 setup:1; /* Disallow bridge core from requesting different VLAN awareness * settings on ports if not hardware-supported */ u32 vlan_filtering_is_global:1; /* Keep VLAN filtering enabled on ports not offloading any upper */ u32 needs_standalone_vlan_filtering:1; /* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges * that have vlan_filtering=0. All drivers should ideally set this (and * then the option would get removed), but it is unknown whether this * would break things or not. */ u32 configure_vlan_while_not_filtering:1; /* Pop the default_pvid of VLAN-unaware bridge ports from tagged frames. * DEPRECATED: Do NOT set this field in new drivers. Instead look at * the dsa_software_vlan_untag() comments. */ u32 untag_bridge_pvid:1; /* Pop the default_pvid of VLAN-aware bridge ports from tagged frames. * Useful if the switch cannot preserve the VLAN tag as seen on the * wire for user port ingress, and chooses to send all frames as * VLAN-tagged to the CPU, including those which were originally * untagged. */ u32 untag_vlan_aware_bridge_pvid:1; /* Let DSA manage the FDB entries towards the * CPU, based on the software bridge database. */ u32 assisted_learning_on_cpu_port:1; /* In case vlan_filtering_is_global is set, the VLAN awareness state * should be retrieved from here and not from the per-port settings. */ u32 vlan_filtering:1; /* For switches that only have the MRU configurable. To ensure the * configured MTU is not exceeded, normalization of MRU on all bridged * interfaces is needed. */ u32 mtu_enforcement_ingress:1; /* Drivers that isolate the FDBs of multiple bridges must set this * to true to receive the bridge as an argument in .port_fdb_{add,del} * and .port_mdb_{add,del}. Otherwise, the bridge.num will always be * passed as zero. */ u32 fdb_isolation:1; /* Drivers that have global DSCP mapping settings must set this to * true to automatically apply the settings to all ports. */ u32 dscp_prio_mapping_is_global:1; /* Listener for switch fabric events */ struct notifier_block nb; /* * Give the switch driver somewhere to hang its private data * structure. */ void *priv; void *tagger_data; /* * Configuration data for this switch. */ struct dsa_chip_data *cd; /* * The switch operations. */ const struct dsa_switch_ops *ops; /* * Allow a DSA switch driver to override the phylink MAC ops */ const struct phylink_mac_ops *phylink_mac_ops; /* * User mii_bus and devices for the individual ports. */ u32 phys_mii_mask; struct mii_bus *user_mii_bus; /* Ageing Time limits in msecs */ unsigned int ageing_time_min; unsigned int ageing_time_max; /* Storage for drivers using tag_8021q */ struct dsa_8021q_context *tag_8021q_ctx; /* devlink used to represent this switch device */ struct devlink *devlink; /* Number of switch port queues */ unsigned int num_tx_queues; /* Drivers that benefit from having an ID associated with each * offloaded LAG should set this to the maximum number of * supported IDs. DSA will then maintain a mapping of _at * least_ these many IDs, accessible to drivers via * dsa_lag_id(). */ unsigned int num_lag_ids; /* Drivers that support bridge forwarding offload or FDB isolation * should set this to the maximum number of bridges spanning the same * switch tree (or all trees, in the case of cross-tree bridging * support) that can be offloaded. */ unsigned int max_num_bridges; unsigned int num_ports; }; static inline struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) { struct dsa_switch_tree *dst = ds->dst; struct dsa_port *dp; list_for_each_entry(dp, &dst->ports, list) if (dp->ds == ds && dp->index == p) return dp; return NULL; } static inline bool dsa_port_is_dsa(struct dsa_port *port) { return port->type == DSA_PORT_TYPE_DSA; } static inline bool dsa_port_is_cpu(struct dsa_port *port) { return port->type == DSA_PORT_TYPE_CPU; } static inline bool dsa_port_is_user(struct dsa_port *dp) { return dp->type == DSA_PORT_TYPE_USER; } static inline bool dsa_port_is_unused(struct dsa_port *dp) { return dp->type == DSA_PORT_TYPE_UNUSED; } static inline bool dsa_port_conduit_is_operational(struct dsa_port *dp) { return dsa_port_is_cpu(dp) && dp->conduit_admin_up && dp->conduit_oper_up; } static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED; } static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_CPU; } static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_DSA; } static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_USER; } #define dsa_tree_for_each_user_port(_dp, _dst) \ list_for_each_entry((_dp), &(_dst)->ports, list) \ if (dsa_port_is_user((_dp))) #define dsa_tree_for_each_user_port_continue_reverse(_dp, _dst) \ list_for_each_entry_continue_reverse((_dp), &(_dst)->ports, list) \ if (dsa_port_is_user((_dp))) #define dsa_tree_for_each_cpu_port(_dp, _dst) \ list_for_each_entry((_dp), &(_dst)->ports, list) \ if (dsa_port_is_cpu((_dp))) #define dsa_switch_for_each_port(_dp, _ds) \ list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ if ((_dp)->ds == (_ds)) #define dsa_switch_for_each_port_safe(_dp, _next, _ds) \ list_for_each_entry_safe((_dp), (_next), &(_ds)->dst->ports, list) \ if ((_dp)->ds == (_ds)) #define dsa_switch_for_each_port_continue_reverse(_dp, _ds) \ list_for_each_entry_continue_reverse((_dp), &(_ds)->dst->ports, list) \ if ((_dp)->ds == (_ds)) #define dsa_switch_for_each_available_port(_dp, _ds) \ dsa_switch_for_each_port((_dp), (_ds)) \ if (!dsa_port_is_unused((_dp))) #define dsa_switch_for_each_user_port(_dp, _ds) \ dsa_switch_for_each_port((_dp), (_ds)) \ if (dsa_port_is_user((_dp))) #define dsa_switch_for_each_user_port_continue_reverse(_dp, _ds) \ dsa_switch_for_each_port_continue_reverse((_dp), (_ds)) \ if (dsa_port_is_user((_dp))) #define dsa_switch_for_each_cpu_port(_dp, _ds) \ dsa_switch_for_each_port((_dp), (_ds)) \ if (dsa_port_is_cpu((_dp))) #define dsa_switch_for_each_cpu_port_continue_reverse(_dp, _ds) \ dsa_switch_for_each_port_continue_reverse((_dp), (_ds)) \ if (dsa_port_is_cpu((_dp))) static inline u32 dsa_user_ports(struct dsa_switch *ds) { struct dsa_port *dp; u32 mask = 0; dsa_switch_for_each_user_port(dp, ds) mask |= BIT(dp->index); return mask; } static inline u32 dsa_cpu_ports(struct dsa_switch *ds) { struct dsa_port *cpu_dp; u32 mask = 0; dsa_switch_for_each_cpu_port(cpu_dp, ds) mask |= BIT(cpu_dp->index); return mask; } /* Return the local port used to reach an arbitrary switch device */ static inline unsigned int dsa_routing_port(struct dsa_switch *ds, int device) { struct dsa_switch_tree *dst = ds->dst; struct dsa_link *dl; list_for_each_entry(dl, &dst->rtable, list) if (dl->dp->ds == ds && dl->link_dp->ds->index == device) return dl->dp->index; return ds->num_ports; } /* Return the local port used to reach an arbitrary switch port */ static inline unsigned int dsa_towards_port(struct dsa_switch *ds, int device, int port) { if (device == ds->index) return port; else return dsa_routing_port(ds, device); } /* Return the local port used to reach the dedicated CPU port */ static inline unsigned int dsa_upstream_port(struct dsa_switch *ds, int port) { const struct dsa_port *dp = dsa_to_port(ds, port); const struct dsa_port *cpu_dp = dp->cpu_dp; if (!cpu_dp) return port; return dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index); } /* Return true if this is the local port used to reach the CPU port */ static inline bool dsa_is_upstream_port(struct dsa_switch *ds, int port) { if (dsa_is_unused_port(ds, port)) return false; return port == dsa_upstream_port(ds, port); } /* Return true if this is a DSA port leading away from the CPU */ static inline bool dsa_is_downstream_port(struct dsa_switch *ds, int port) { return dsa_is_dsa_port(ds, port) && !dsa_is_upstream_port(ds, port); } /* Return the local port used to reach the CPU port */ static inline unsigned int dsa_switch_upstream_port(struct dsa_switch *ds) { struct dsa_port *dp; dsa_switch_for_each_available_port(dp, ds) { return dsa_upstream_port(ds, dp->index); } return ds->num_ports; } /* Return true if @upstream_ds is an upstream switch of @downstream_ds, meaning * that the routing port from @downstream_ds to @upstream_ds is also the port * which @downstream_ds uses to reach its dedicated CPU. */ static inline bool dsa_switch_is_upstream_of(struct dsa_switch *upstream_ds, struct dsa_switch *downstream_ds) { int routing_port; if (upstream_ds == downstream_ds) return true; routing_port = dsa_routing_port(downstream_ds, upstream_ds->index); return dsa_is_upstream_port(downstream_ds, routing_port); } static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp) { const struct dsa_switch *ds = dp->ds; if (ds->vlan_filtering_is_global) return ds->vlan_filtering; else return dp->vlan_filtering; } static inline unsigned int dsa_port_lag_id_get(struct dsa_port *dp) { return dp->lag ? dp->lag->id : 0; } static inline struct net_device *dsa_port_lag_dev_get(struct dsa_port *dp) { return dp->lag ? dp->lag->dev : NULL; } static inline bool dsa_port_offloads_lag(struct dsa_port *dp, const struct dsa_lag *lag) { return dsa_port_lag_dev_get(dp) == lag->dev; } static inline struct net_device *dsa_port_to_conduit(const struct dsa_port *dp) { if (dp->cpu_port_in_lag) return dsa_port_lag_dev_get(dp->cpu_dp); return dp->cpu_dp->conduit; } static inline struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) { if (!dp->bridge) return NULL; if (dp->lag) return dp->lag->dev; else if (dp->hsr_dev) return dp->hsr_dev; return dp->user; } static inline struct net_device * dsa_port_bridge_dev_get(const struct dsa_port *dp) { return dp->bridge ? dp->bridge->dev : NULL; } static inline unsigned int dsa_port_bridge_num_get(struct dsa_port *dp) { return dp->bridge ? dp->bridge->num : 0; } static inline bool dsa_port_bridge_same(const struct dsa_port *a, const struct dsa_port *b) { struct net_device *br_a = dsa_port_bridge_dev_get(a); struct net_device *br_b = dsa_port_bridge_dev_get(b); /* Standalone ports are not in the same bridge with one another */ return (!br_a || !br_b) ? false : (br_a == br_b); } static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp, const struct net_device *dev) { return dsa_port_to_bridge_port(dp) == dev; } static inline bool dsa_port_offloads_bridge_dev(struct dsa_port *dp, const struct net_device *bridge_dev) { /* DSA ports connected to a bridge, and event was emitted * for the bridge. */ return dsa_port_bridge_dev_get(dp) == bridge_dev; } static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, const struct dsa_bridge *bridge) { return dsa_port_bridge_dev_get(dp) == bridge->dev; } /* Returns true if any port of this tree offloads the given net_device */ static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, const struct net_device *dev) { struct dsa_port *dp; list_for_each_entry(dp, &dst->ports, list) if (dsa_port_offloads_bridge_port(dp, dev)) return true; return false; } /* Returns true if any port of this tree offloads the given bridge */ static inline bool dsa_tree_offloads_bridge_dev(struct dsa_switch_tree *dst, const struct net_device *bridge_dev) { struct dsa_port *dp; list_for_each_entry(dp, &dst->ports, list) if (dsa_port_offloads_bridge_dev(dp, bridge_dev)) return true; return false; } static inline bool dsa_port_tree_same(const struct dsa_port *a, const struct dsa_port *b) { return a->ds->dst == b->ds->dst; } typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { /* * Tagging protocol helpers called for the CPU ports and DSA links. * @get_tag_protocol retrieves the initial tagging protocol and is * mandatory. Switches which can operate using multiple tagging * protocols should implement @change_tag_protocol and report in * @get_tag_protocol the tagger in current use. */ enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, int port, enum dsa_tag_protocol mprot); int (*change_tag_protocol)(struct dsa_switch *ds, enum dsa_tag_protocol proto); /* * Method for switch drivers to connect to the tagging protocol driver * in current use. The switch driver can provide handlers for certain * types of packets for switch management. */ int (*connect_tag_protocol)(struct dsa_switch *ds, enum dsa_tag_protocol proto); int (*port_change_conduit)(struct dsa_switch *ds, int port, struct net_device *conduit, struct netlink_ext_ack *extack); /* Optional switch-wide initialization and destruction methods */ int (*setup)(struct dsa_switch *ds); void (*teardown)(struct dsa_switch *ds); /* Per-port initialization and destruction methods. Mandatory if the * driver registers devlink port regions, optional otherwise. */ int (*port_setup)(struct dsa_switch *ds, int port); void (*port_teardown)(struct dsa_switch *ds, int port); u32 (*get_phy_flags)(struct dsa_switch *ds, int port); /* * Access to the switch's PHY registers. */ int (*phy_read)(struct dsa_switch *ds, int port, int regnum); int (*phy_write)(struct dsa_switch *ds, int port, int regnum, u16 val); /* * PHYLINK integration */ void (*phylink_get_caps)(struct dsa_switch *ds, int port, struct phylink_config *config); struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds, int port, phy_interface_t iface); void (*phylink_mac_config)(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state); void (*phylink_mac_link_down)(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface); void (*phylink_mac_link_up)(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface, struct phy_device *phydev, int speed, int duplex, bool tx_pause, bool rx_pause); void (*phylink_fixed_state)(struct dsa_switch *ds, int port, struct phylink_link_state *state); /* * Port statistics counters. */ void (*get_strings)(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data); void (*get_ethtool_stats)(struct dsa_switch *ds, int port, uint64_t *data); int (*get_sset_count)(struct dsa_switch *ds, int port, int sset); void (*get_ethtool_phy_stats)(struct dsa_switch *ds, int port, uint64_t *data); void (*get_eth_phy_stats)(struct dsa_switch *ds, int port, struct ethtool_eth_phy_stats *phy_stats); void (*get_eth_mac_stats)(struct dsa_switch *ds, int port, struct ethtool_eth_mac_stats *mac_stats); void (*get_eth_ctrl_stats)(struct dsa_switch *ds, int port, struct ethtool_eth_ctrl_stats *ctrl_stats); void (*get_rmon_stats)(struct dsa_switch *ds, int port, struct ethtool_rmon_stats *rmon_stats, const struct ethtool_rmon_hist_range **ranges); void (*get_stats64)(struct dsa_switch *ds, int port, struct rtnl_link_stats64 *s); void (*get_pause_stats)(struct dsa_switch *ds, int port, struct ethtool_pause_stats *pause_stats); void (*self_test)(struct dsa_switch *ds, int port, struct ethtool_test *etest, u64 *data); /* * ethtool Wake-on-LAN */ void (*get_wol)(struct dsa_switch *ds, int port, struct ethtool_wolinfo *w); int (*set_wol)(struct dsa_switch *ds, int port, struct ethtool_wolinfo *w); /* * ethtool timestamp info */ int (*get_ts_info)(struct dsa_switch *ds, int port, struct kernel_ethtool_ts_info *ts); /* * ethtool MAC merge layer */ int (*get_mm)(struct dsa_switch *ds, int port, struct ethtool_mm_state *state); int (*set_mm)(struct dsa_switch *ds, int port, struct ethtool_mm_cfg *cfg, struct netlink_ext_ack *extack); void (*get_mm_stats)(struct dsa_switch *ds, int port, struct ethtool_mm_stats *stats); /* * DCB ops */ int (*port_get_default_prio)(struct dsa_switch *ds, int port); int (*port_set_default_prio)(struct dsa_switch *ds, int port, u8 prio); int (*port_get_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp); int (*port_add_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp, u8 prio); int (*port_del_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp, u8 prio); int (*port_set_apptrust)(struct dsa_switch *ds, int port, const u8 *sel, int nsel); int (*port_get_apptrust)(struct dsa_switch *ds, int port, u8 *sel, int *nsel); /* * Suspend and resume */ int (*suspend)(struct dsa_switch *ds); int (*resume)(struct dsa_switch *ds); /* * Port enable/disable */ int (*port_enable)(struct dsa_switch *ds, int port, struct phy_device *phy); void (*port_disable)(struct dsa_switch *ds, int port); /* * Notification for MAC address changes on user ports. Drivers can * currently only veto operations. They should not use the method to * program the hardware, since the operation is not rolled back in case * of other errors. */ int (*port_set_mac_address)(struct dsa_switch *ds, int port, const unsigned char *addr); /* * Compatibility between device trees defining multiple CPU ports and * drivers which are not OK to use by default the numerically smallest * CPU port of a switch for its local ports. This can return NULL, * meaning "don't know/don't care". */ struct dsa_port *(*preferred_default_local_cpu_port)(struct dsa_switch *ds); /* * Port's MAC EEE settings */ int (*set_mac_eee)(struct dsa_switch *ds, int port, struct ethtool_keee *e); int (*get_mac_eee)(struct dsa_switch *ds, int port, struct ethtool_keee *e); /* EEPROM access */ int (*get_eeprom_len)(struct dsa_switch *ds); int (*get_eeprom)(struct dsa_switch *ds, struct ethtool_eeprom *eeprom, u8 *data); int (*set_eeprom)(struct dsa_switch *ds, struct ethtool_eeprom *eeprom, u8 *data); /* * Register access. */ int (*get_regs_len)(struct dsa_switch *ds, int port); void (*get_regs)(struct dsa_switch *ds, int port, struct ethtool_regs *regs, void *p); /* * Upper device tracking. */ int (*port_prechangeupper)(struct dsa_switch *ds, int port, struct netdev_notifier_changeupper_info *info); /* * Bridge integration */ int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, struct dsa_bridge bridge, bool *tx_fwd_offload, struct netlink_ext_ack *extack); void (*port_bridge_leave)(struct dsa_switch *ds, int port, struct dsa_bridge bridge); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); int (*port_mst_state_set)(struct dsa_switch *ds, int port, const struct switchdev_mst_state *state); void (*port_fast_age)(struct dsa_switch *ds, int port); int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid); int (*port_pre_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); int (*port_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); void (*port_set_host_flood)(struct dsa_switch *ds, int port, bool uc, bool mc); /* * VLAN support */ int (*port_vlan_filtering)(struct dsa_switch *ds, int port, bool vlan_filtering, struct netlink_ext_ack *extack); int (*port_vlan_add)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct netlink_ext_ack *extack); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); int (*vlan_msti_set)(struct dsa_switch *ds, struct dsa_bridge bridge, const struct switchdev_vlan_msti *msti); /* * Forwarding database */ int (*port_fdb_add)(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid, struct dsa_db db); int (*port_fdb_del)(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid, struct dsa_db db); int (*port_fdb_dump)(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data); int (*lag_fdb_add)(struct dsa_switch *ds, struct dsa_lag lag, const unsigned char *addr, u16 vid, struct dsa_db db); int (*lag_fdb_del)(struct dsa_switch *ds, struct dsa_lag lag, const unsigned char *addr, u16 vid, struct dsa_db db); /* * Multicast database */ int (*port_mdb_add)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb, struct dsa_db db); int (*port_mdb_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb, struct dsa_db db); /* * RXNFC */ int (*get_rxnfc)(struct dsa_switch *ds, int port, struct ethtool_rxnfc *nfc, u32 *rule_locs); int (*set_rxnfc)(struct dsa_switch *ds, int port, struct ethtool_rxnfc *nfc); /* * TC integration */ int (*cls_flower_add)(struct dsa_switch *ds, int port, struct flow_cls_offload *cls, bool ingress); int (*cls_flower_del)(struct dsa_switch *ds, int port, struct flow_cls_offload *cls, bool ingress); int (*cls_flower_stats)(struct dsa_switch *ds, int port, struct flow_cls_offload *cls, bool ingress); int (*port_mirror_add)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, bool ingress, struct netlink_ext_ack *extack); void (*port_mirror_del)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); int (*port_policer_add)(struct dsa_switch *ds, int port, struct dsa_mall_policer_tc_entry *policer); void (*port_policer_del)(struct dsa_switch *ds, int port); int (*port_setup_tc)(struct dsa_switch *ds, int port, enum tc_setup_type type, void *type_data); /* * Cross-chip operations */ int (*crosschip_bridge_join)(struct dsa_switch *ds, int tree_index, int sw_index, int port, struct dsa_bridge bridge, struct netlink_ext_ack *extack); void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index, int sw_index, int port, struct dsa_bridge bridge); int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index, int port); int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, int port, struct dsa_lag lag, struct netdev_lag_upper_info *info, struct netlink_ext_ack *extack); int (*crosschip_lag_leave)(struct dsa_switch *ds, int sw_index, int port, struct dsa_lag lag); /* * PTP functionality */ int (*port_hwtstamp_get)(struct dsa_switch *ds, int port, struct ifreq *ifr); int (*port_hwtstamp_set)(struct dsa_switch *ds, int port, struct ifreq *ifr); void (*port_txtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb); bool (*port_rxtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb, unsigned int type); /* Devlink parameters, etc */ int (*devlink_param_get)(struct dsa_switch *ds, u32 id, struct devlink_param_gset_ctx *ctx); int (*devlink_param_set)(struct dsa_switch *ds, u32 id, struct devlink_param_gset_ctx *ctx); int (*devlink_info_get)(struct dsa_switch *ds, struct devlink_info_req *req, struct netlink_ext_ack *extack); int (*devlink_sb_pool_get)(struct dsa_switch *ds, unsigned int sb_index, u16 pool_index, struct devlink_sb_pool_info *pool_info); int (*devlink_sb_pool_set)(struct dsa_switch *ds, unsigned int sb_index, u16 pool_index, u32 size, enum devlink_sb_threshold_type threshold_type, struct netlink_ext_ack *extack); int (*devlink_sb_port_pool_get)(struct dsa_switch *ds, int port, unsigned int sb_index, u16 pool_index, u32 *p_threshold); int (*devlink_sb_port_pool_set)(struct dsa_switch *ds, int port, unsigned int sb_index, u16 pool_index, u32 threshold, struct netlink_ext_ack *extack); int (*devlink_sb_tc_pool_bind_get)(struct dsa_switch *ds, int port, unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, u16 *p_pool_index, u32 *p_threshold); int (*devlink_sb_tc_pool_bind_set)(struct dsa_switch *ds, int port, unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, u16 pool_index, u32 threshold, struct netlink_ext_ack *extack); int (*devlink_sb_occ_snapshot)(struct dsa_switch *ds, unsigned int sb_index); int (*devlink_sb_occ_max_clear)(struct dsa_switch *ds, unsigned int sb_index); int (*devlink_sb_occ_port_pool_get)(struct dsa_switch *ds, int port, unsigned int sb_index, u16 pool_index, u32 *p_cur, u32 *p_max); int (*devlink_sb_occ_tc_port_bind_get)(struct dsa_switch *ds, int port, unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, u32 *p_cur, u32 *p_max); /* * MTU change functionality. Switches can also adjust their MRU through * this method. By MTU, one understands the SDU (L2 payload) length. * If the switch needs to account for the DSA tag on the CPU port, this * method needs to do so privately. */ int (*port_change_mtu)(struct dsa_switch *ds, int port, int new_mtu); int (*port_max_mtu)(struct dsa_switch *ds, int port); /* * LAG integration */ int (*port_lag_change)(struct dsa_switch *ds, int port); int (*port_lag_join)(struct dsa_switch *ds, int port, struct dsa_lag lag, struct netdev_lag_upper_info *info, struct netlink_ext_ack *extack); int (*port_lag_leave)(struct dsa_switch *ds, int port, struct dsa_lag lag); /* * HSR integration */ int (*port_hsr_join)(struct dsa_switch *ds, int port, struct net_device *hsr, struct netlink_ext_ack *extack); int (*port_hsr_leave)(struct dsa_switch *ds, int port, struct net_device *hsr); /* * MRP integration */ int (*port_mrp_add)(struct dsa_switch *ds, int port, const struct switchdev_obj_mrp *mrp); int (*port_mrp_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_mrp *mrp); int (*port_mrp_add_ring_role)(struct dsa_switch *ds, int port, const struct switchdev_obj_ring_role_mrp *mrp); int (*port_mrp_del_ring_role)(struct dsa_switch *ds, int port, const struct switchdev_obj_ring_role_mrp *mrp); /* * tag_8021q operations */ int (*tag_8021q_vlan_add)(struct dsa_switch *ds, int port, u16 vid, u16 flags); int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid); /* * DSA conduit tracking operations */ void (*conduit_state_change)(struct dsa_switch *ds, const struct net_device *conduit, bool operational); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes, \ dsa_devlink_param_get, dsa_devlink_param_set, NULL) int dsa_devlink_param_get(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx); int dsa_devlink_param_set(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx, struct netlink_ext_ack *extack); int dsa_devlink_params_register(struct dsa_switch *ds, const struct devlink_param *params, size_t params_count); void dsa_devlink_params_unregister(struct dsa_switch *ds, const struct devlink_param *params, size_t params_count); int dsa_devlink_resource_register(struct dsa_switch *ds, const char *resource_name, u64 resource_size, u64 resource_id, u64 parent_resource_id, const struct devlink_resource_size_params *size_params); void dsa_devlink_resources_unregister(struct dsa_switch *ds); void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds, u64 resource_id, devlink_resource_occ_get_t *occ_get, void *occ_get_priv); void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds, u64 resource_id); struct devlink_region * dsa_devlink_region_create(struct dsa_switch *ds, const struct devlink_region_ops *ops, u32 region_max_snapshots, u64 region_size); struct devlink_region * dsa_devlink_port_region_create(struct dsa_switch *ds, int port, const struct devlink_port_region_ops *ops, u32 region_max_snapshots, u64 region_size); void dsa_devlink_region_destroy(struct devlink_region *region); struct dsa_port *dsa_port_from_netdev(struct net_device *netdev); struct dsa_devlink_priv { struct dsa_switch *ds; }; static inline struct dsa_switch *dsa_devlink_to_ds(struct devlink *dl) { struct dsa_devlink_priv *dl_priv = devlink_priv(dl); return dl_priv->ds; } static inline struct dsa_switch *dsa_devlink_port_to_ds(struct devlink_port *port) { struct devlink *dl = port->devlink; struct dsa_devlink_priv *dl_priv = devlink_priv(dl); return dl_priv->ds; } static inline int dsa_devlink_port_to_port(struct devlink_port *port) { return port->index; } struct dsa_switch_driver { struct list_head list; const struct dsa_switch_ops *ops; }; bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid, struct dsa_db db); bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb, struct dsa_db db); /* Keep inline for faster access in hot path */ static inline bool netdev_uses_dsa(const struct net_device *dev) { #if IS_ENABLED(CONFIG_NET_DSA) return dev->dsa_ptr && dev->dsa_ptr->rcv; #endif return false; } /* All DSA tags that push the EtherType to the right (basically all except tail * tags, which don't break dissection) can be treated the same from the * perspective of the flow dissector. * * We need to return: * - offset: the (B - A) difference between: * A. the position of the real EtherType and * B. the current skb->data (aka ETH_HLEN bytes into the frame, aka 2 bytes * after the normal EtherType was supposed to be) * The offset in bytes is exactly equal to the tagger overhead (and half of * that, in __be16 shorts). * * - proto: the value of the real EtherType. */ static inline void dsa_tag_generic_flow_dissect(const struct sk_buff *skb, __be16 *proto, int *offset) { #if IS_ENABLED(CONFIG_NET_DSA) const struct dsa_device_ops *ops = skb->dev->dsa_ptr->tag_ops; int tag_len = ops->needed_headroom; *offset = tag_len; *proto = ((__be16 *)skb->data)[(tag_len / 2) - 1]; #endif } void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds); void dsa_switch_shutdown(struct dsa_switch *ds); struct dsa_switch *dsa_switch_find(int tree_index, int sw_index); void dsa_flush_workqueue(void); #ifdef CONFIG_PM_SLEEP int dsa_switch_suspend(struct dsa_switch *ds); int dsa_switch_resume(struct dsa_switch *ds); #else static inline int dsa_switch_suspend(struct dsa_switch *ds) { return 0; } static inline int dsa_switch_resume(struct dsa_switch *ds) { return 0; } #endif /* CONFIG_PM_SLEEP */ #if IS_ENABLED(CONFIG_NET_DSA) bool dsa_user_dev_check(const struct net_device *dev); #else static inline bool dsa_user_dev_check(const struct net_device *dev) { return false; } #endif netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev); void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up); #endif
1 1 1 1 12 12 12 2 12 21 22 2 19 1 2 2 1 16 20 86 15 108 1 1 31 80 60 1 60 60 60 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 // SPDX-License-Identifier: GPL-2.0 #include <linux/anon_inodes.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/magic.h> #include <linux/mount.h> #include <linux/pid.h> #include <linux/pidfs.h> #include <linux/pid_namespace.h> #include <linux/poll.h> #include <linux/proc_fs.h> #include <linux/proc_ns.h> #include <linux/pseudo_fs.h> #include <linux/ptrace.h> #include <linux/seq_file.h> #include <uapi/linux/pidfd.h> #include <linux/ipc_namespace.h> #include <linux/time_namespace.h> #include <linux/utsname.h> #include <net/net_namespace.h> #include "internal.h" #include "mount.h" #ifdef CONFIG_PROC_FS /** * pidfd_show_fdinfo - print information about a pidfd * @m: proc fdinfo file * @f: file referencing a pidfd * * Pid: * This function will print the pid that a given pidfd refers to in the * pid namespace of the procfs instance. * If the pid namespace of the process is not a descendant of the pid * namespace of the procfs instance 0 will be shown as its pid. This is * similar to calling getppid() on a process whose parent is outside of * its pid namespace. * * NSpid: * If pid namespaces are supported then this function will also print * the pid of a given pidfd refers to for all descendant pid namespaces * starting from the current pid namespace of the instance, i.e. the * Pid field and the first entry in the NSpid field will be identical. * If the pid namespace of the process is not a descendant of the pid * namespace of the procfs instance 0 will be shown as its first NSpid * entry and no others will be shown. * Note that this differs from the Pid and NSpid fields in * /proc/<pid>/status where Pid and NSpid are always shown relative to * the pid namespace of the procfs instance. The difference becomes * obvious when sending around a pidfd between pid namespaces from a * different branch of the tree, i.e. where no ancestral relation is * present between the pid namespaces: * - create two new pid namespaces ns1 and ns2 in the initial pid * namespace (also take care to create new mount namespaces in the * new pid namespace and mount procfs) * - create a process with a pidfd in ns1 * - send pidfd from ns1 to ns2 * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid * have exactly one entry, which is 0 */ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) { struct pid *pid = pidfd_pid(f); struct pid_namespace *ns; pid_t nr = -1; if (likely(pid_has_task(pid, PIDTYPE_PID))) { ns = proc_pid_ns(file_inode(m->file)->i_sb); nr = pid_nr_ns(pid, ns); } seq_put_decimal_ll(m, "Pid:\t", nr); #ifdef CONFIG_PID_NS seq_put_decimal_ll(m, "\nNSpid:\t", nr); if (nr > 0) { int i; /* If nr is non-zero it means that 'pid' is valid and that * ns, i.e. the pid namespace associated with the procfs * instance, is in the pid namespace hierarchy of pid. * Start at one below the already printed level. */ for (i = ns->level + 1; i <= pid->level; i++) seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); } #endif seq_putc(m, '\n'); } #endif /* * Poll support for process exit notification. */ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) { struct pid *pid = pidfd_pid(file); bool thread = file->f_flags & PIDFD_THREAD; struct task_struct *task; __poll_t poll_flags = 0; poll_wait(file, &pid->wait_pidfd, pts); /* * Depending on PIDFD_THREAD, inform pollers when the thread * or the whole thread-group exits. */ guard(rcu)(); task = pid_task(pid, PIDTYPE_PID); if (!task) poll_flags = EPOLLIN | EPOLLRDNORM | EPOLLHUP; else if (task->exit_state && (thread || thread_group_empty(task))) poll_flags = EPOLLIN | EPOLLRDNORM; return poll_flags; } static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct task_struct *task __free(put_task) = NULL; struct nsproxy *nsp __free(put_nsproxy) = NULL; struct pid *pid = pidfd_pid(file); struct ns_common *ns_common = NULL; if (arg) return -EINVAL; task = get_pid_task(pid, PIDTYPE_PID); if (!task) return -ESRCH; scoped_guard(task_lock, task) { nsp = task->nsproxy; if (nsp) get_nsproxy(nsp); } if (!nsp) return -ESRCH; /* just pretend it didn't exist */ /* * We're trying to open a file descriptor to the namespace so perform a * filesystem cred ptrace check. Also, we mirror nsfs behavior. */ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) return -EACCES; switch (cmd) { /* Namespaces that hang of nsproxy. */ case PIDFD_GET_CGROUP_NAMESPACE: if (IS_ENABLED(CONFIG_CGROUPS)) { get_cgroup_ns(nsp->cgroup_ns); ns_common = to_ns_common(nsp->cgroup_ns); } break; case PIDFD_GET_IPC_NAMESPACE: if (IS_ENABLED(CONFIG_IPC_NS)) { get_ipc_ns(nsp->ipc_ns); ns_common = to_ns_common(nsp->ipc_ns); } break; case PIDFD_GET_MNT_NAMESPACE: get_mnt_ns(nsp->mnt_ns); ns_common = to_ns_common(nsp->mnt_ns); break; case PIDFD_GET_NET_NAMESPACE: if (IS_ENABLED(CONFIG_NET_NS)) { ns_common = to_ns_common(nsp->net_ns); get_net_ns(ns_common); } break; case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE: if (IS_ENABLED(CONFIG_PID_NS)) { get_pid_ns(nsp->pid_ns_for_children); ns_common = to_ns_common(nsp->pid_ns_for_children); } break; case PIDFD_GET_TIME_NAMESPACE: if (IS_ENABLED(CONFIG_TIME_NS)) { get_time_ns(nsp->time_ns); ns_common = to_ns_common(nsp->time_ns); } break; case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE: if (IS_ENABLED(CONFIG_TIME_NS)) { get_time_ns(nsp->time_ns_for_children); ns_common = to_ns_common(nsp->time_ns_for_children); } break; case PIDFD_GET_UTS_NAMESPACE: if (IS_ENABLED(CONFIG_UTS_NS)) { get_uts_ns(nsp->uts_ns); ns_common = to_ns_common(nsp->uts_ns); } break; /* Namespaces that don't hang of nsproxy. */ case PIDFD_GET_USER_NAMESPACE: if (IS_ENABLED(CONFIG_USER_NS)) { rcu_read_lock(); ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns))); rcu_read_unlock(); } break; case PIDFD_GET_PID_NAMESPACE: if (IS_ENABLED(CONFIG_PID_NS)) { rcu_read_lock(); ns_common = to_ns_common( get_pid_ns(task_active_pid_ns(task))); rcu_read_unlock(); } break; default: return -ENOIOCTLCMD; } if (!ns_common) return -EOPNOTSUPP; /* open_namespace() unconditionally consumes the reference */ return open_namespace(ns_common); } static const struct file_operations pidfs_file_operations = { .poll = pidfd_poll, #ifdef CONFIG_PROC_FS .show_fdinfo = pidfd_show_fdinfo, #endif .unlocked_ioctl = pidfd_ioctl, .compat_ioctl = compat_ptr_ioctl, }; struct pid *pidfd_pid(const struct file *file) { if (file->f_op != &pidfs_file_operations) return ERR_PTR(-EBADF); return file_inode(file)->i_private; } static struct vfsmount *pidfs_mnt __ro_after_init; #if BITS_PER_LONG == 32 /* * Provide a fallback mechanism for 32-bit systems so processes remain * reliably comparable by inode number even on those systems. */ static DEFINE_IDA(pidfd_inum_ida); static int pidfs_inum(struct pid *pid, unsigned long *ino) { int ret; ret = ida_alloc_range(&pidfd_inum_ida, RESERVED_PIDS + 1, UINT_MAX, GFP_ATOMIC); if (ret < 0) return -ENOSPC; *ino = ret; return 0; } static inline void pidfs_free_inum(unsigned long ino) { if (ino > 0) ida_free(&pidfd_inum_ida, ino); } #else static inline int pidfs_inum(struct pid *pid, unsigned long *ino) { *ino = pid->ino; return 0; } #define pidfs_free_inum(ino) ((void)(ino)) #endif /* * The vfs falls back to simple_setattr() if i_op->setattr() isn't * implemented. Let's reject it completely until we have a clean * permission concept for pidfds. */ static int pidfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { return -EOPNOTSUPP; } /* * User space expects pidfs inodes to have no file type in st_mode. * * In particular, 'lsof' has this legacy logic: * * type = s->st_mode & S_IFMT; * switch (type) { * ... * case 0: * if (!strcmp(p, "anon_inode")) * Lf->ntype = Ntype = N_ANON_INODE; * * to detect our old anon_inode logic. * * Rather than mess with our internal sane inode data, just fix it * up here in getattr() by masking off the format bits. */ static int pidfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->mode &= ~S_IFMT; return 0; } static const struct inode_operations pidfs_inode_operations = { .getattr = pidfs_getattr, .setattr = pidfs_setattr, }; static void pidfs_evict_inode(struct inode *inode) { struct pid *pid = inode->i_private; clear_inode(inode); put_pid(pid); pidfs_free_inum(inode->i_ino); } static const struct super_operations pidfs_sops = { .drop_inode = generic_delete_inode, .evict_inode = pidfs_evict_inode, .statfs = simple_statfs, }; /* * 'lsof' has knowledge of out historical anon_inode use, and expects * the pidfs dentry name to start with 'anon_inode'. */ static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen) { return dynamic_dname(buffer, buflen, "anon_inode:[pidfd]"); } static const struct dentry_operations pidfs_dentry_operations = { .d_delete = always_delete_dentry, .d_dname = pidfs_dname, .d_prune = stashed_dentry_prune, }; static int pidfs_init_inode(struct inode *inode, void *data) { inode->i_private = data; inode->i_flags |= S_PRIVATE; inode->i_mode |= S_IRWXU; inode->i_op = &pidfs_inode_operations; inode->i_fop = &pidfs_file_operations; /* * Inode numbering for pidfs start at RESERVED_PIDS + 1. This * avoids collisions with the root inode which is 1 for pseudo * filesystems. */ return pidfs_inum(data, &inode->i_ino); } static void pidfs_put_data(void *data) { struct pid *pid = data; put_pid(pid); } static const struct stashed_operations pidfs_stashed_ops = { .init_inode = pidfs_init_inode, .put_data = pidfs_put_data, }; static int pidfs_init_fs_context(struct fs_context *fc) { struct pseudo_fs_context *ctx; ctx = init_pseudo(fc, PID_FS_MAGIC); if (!ctx) return -ENOMEM; ctx->ops = &pidfs_sops; ctx->dops = &pidfs_dentry_operations; fc->s_fs_info = (void *)&pidfs_stashed_ops; return 0; } static struct file_system_type pidfs_type = { .name = "pidfs", .init_fs_context = pidfs_init_fs_context, .kill_sb = kill_anon_super, }; struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) { struct file *pidfd_file; struct path path; int ret; ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path); if (ret < 0) return ERR_PTR(ret); pidfd_file = dentry_open(&path, flags, current_cred()); path_put(&path); return pidfd_file; } void __init pidfs_init(void) { pidfs_mnt = kern_mount(&pidfs_type); if (IS_ERR(pidfs_mnt)) panic("Failed to mount pidfs pseudo filesystem"); }
19 13 25 25 23 12 23 24 3 25 25 12 21 14 25 25 22 25 25 59 13 13 59 59 29 4 7 25 26 24 25 1 21 22 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 // SPDX-License-Identifier: GPL-2.0-or-later /* * linux/fs/fat/cache.c * * Written 1992,1993 by Werner Almesberger * * Mar 1999. AV. Changed cache, so that it uses the starting cluster instead * of inode number. * May 1999. AV. Fixed the bogosity with FAT32 (read "FAT28"). Fscking lusers. * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ #include <linux/slab.h> #include <asm/unaligned.h> #include <linux/buffer_head.h> #include "exfat_raw.h" #include "exfat_fs.h" #define EXFAT_MAX_CACHE 16 struct exfat_cache { struct list_head cache_list; unsigned int nr_contig; /* number of contiguous clusters */ unsigned int fcluster; /* cluster number in the file. */ unsigned int dcluster; /* cluster number on disk. */ }; struct exfat_cache_id { unsigned int id; unsigned int nr_contig; unsigned int fcluster; unsigned int dcluster; }; static struct kmem_cache *exfat_cachep; static void exfat_cache_init_once(void *c) { struct exfat_cache *cache = (struct exfat_cache *)c; INIT_LIST_HEAD(&cache->cache_list); } int exfat_cache_init(void) { exfat_cachep = kmem_cache_create("exfat_cache", sizeof(struct exfat_cache), 0, SLAB_RECLAIM_ACCOUNT, exfat_cache_init_once); if (!exfat_cachep) return -ENOMEM; return 0; } void exfat_cache_shutdown(void) { if (!exfat_cachep) return; kmem_cache_destroy(exfat_cachep); } static inline struct exfat_cache *exfat_cache_alloc(void) { return kmem_cache_alloc(exfat_cachep, GFP_NOFS); } static inline void exfat_cache_free(struct exfat_cache *cache) { WARN_ON(!list_empty(&cache->cache_list)); kmem_cache_free(exfat_cachep, cache); } static inline void exfat_cache_update_lru(struct inode *inode, struct exfat_cache *cache) { struct exfat_inode_info *ei = EXFAT_I(inode); if (ei->cache_lru.next != &cache->cache_list) list_move(&cache->cache_list, &ei->cache_lru); } static unsigned int exfat_cache_lookup(struct inode *inode, unsigned int fclus, struct exfat_cache_id *cid, unsigned int *cached_fclus, unsigned int *cached_dclus) { struct exfat_inode_info *ei = EXFAT_I(inode); static struct exfat_cache nohit = { .fcluster = 0, }; struct exfat_cache *hit = &nohit, *p; unsigned int offset = EXFAT_EOF_CLUSTER; spin_lock(&ei->cache_lru_lock); list_for_each_entry(p, &ei->cache_lru, cache_list) { /* Find the cache of "fclus" or nearest cache. */ if (p->fcluster <= fclus && hit->fcluster < p->fcluster) { hit = p; if (hit->fcluster + hit->nr_contig < fclus) { offset = hit->nr_contig; } else { offset = fclus - hit->fcluster; break; } } } if (hit != &nohit) { exfat_cache_update_lru(inode, hit); cid->id = ei->cache_valid_id; cid->nr_contig = hit->nr_contig; cid->fcluster = hit->fcluster; cid->dcluster = hit->dcluster; *cached_fclus = cid->fcluster + offset; *cached_dclus = cid->dcluster + offset; } spin_unlock(&ei->cache_lru_lock); return offset; } static struct exfat_cache *exfat_cache_merge(struct inode *inode, struct exfat_cache_id *new) { struct exfat_inode_info *ei = EXFAT_I(inode); struct exfat_cache *p; list_for_each_entry(p, &ei->cache_lru, cache_list) { /* Find the same part as "new" in cluster-chain. */ if (p->fcluster == new->fcluster) { if (new->nr_contig > p->nr_contig) p->nr_contig = new->nr_contig; return p; } } return NULL; } static void exfat_cache_add(struct inode *inode, struct exfat_cache_id *new) { struct exfat_inode_info *ei = EXFAT_I(inode); struct exfat_cache *cache, *tmp; if (new->fcluster == EXFAT_EOF_CLUSTER) /* dummy cache */ return; spin_lock(&ei->cache_lru_lock); if (new->id != EXFAT_CACHE_VALID && new->id != ei->cache_valid_id) goto unlock; /* this cache was invalidated */ cache = exfat_cache_merge(inode, new); if (cache == NULL) { if (ei->nr_caches < EXFAT_MAX_CACHE) { ei->nr_caches++; spin_unlock(&ei->cache_lru_lock); tmp = exfat_cache_alloc(); if (!tmp) { spin_lock(&ei->cache_lru_lock); ei->nr_caches--; spin_unlock(&ei->cache_lru_lock); return; } spin_lock(&ei->cache_lru_lock); cache = exfat_cache_merge(inode, new); if (cache != NULL) { ei->nr_caches--; exfat_cache_free(tmp); goto out_update_lru; } cache = tmp; } else { struct list_head *p = ei->cache_lru.prev; cache = list_entry(p, struct exfat_cache, cache_list); } cache->fcluster = new->fcluster; cache->dcluster = new->dcluster; cache->nr_contig = new->nr_contig; } out_update_lru: exfat_cache_update_lru(inode, cache); unlock: spin_unlock(&ei->cache_lru_lock); } /* * Cache invalidation occurs rarely, thus the LRU chain is not updated. It * fixes itself after a while. */ static void __exfat_cache_inval_inode(struct inode *inode) { struct exfat_inode_info *ei = EXFAT_I(inode); struct exfat_cache *cache; while (!list_empty(&ei->cache_lru)) { cache = list_entry(ei->cache_lru.next, struct exfat_cache, cache_list); list_del_init(&cache->cache_list); ei->nr_caches--; exfat_cache_free(cache); } /* Update. The copy of caches before this id is discarded. */ ei->cache_valid_id++; if (ei->cache_valid_id == EXFAT_CACHE_VALID) ei->cache_valid_id++; } void exfat_cache_inval_inode(struct inode *inode) { struct exfat_inode_info *ei = EXFAT_I(inode); spin_lock(&ei->cache_lru_lock); __exfat_cache_inval_inode(inode); spin_unlock(&ei->cache_lru_lock); } static inline int cache_contiguous(struct exfat_cache_id *cid, unsigned int dclus) { cid->nr_contig++; return cid->dcluster + cid->nr_contig == dclus; } static inline void cache_init(struct exfat_cache_id *cid, unsigned int fclus, unsigned int dclus) { cid->id = EXFAT_CACHE_VALID; cid->fcluster = fclus; cid->dcluster = dclus; cid->nr_contig = 0; } int exfat_get_cluster(struct inode *inode, unsigned int cluster, unsigned int *fclus, unsigned int *dclus, unsigned int *last_dclus, int allow_eof) { struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); unsigned int limit = sbi->num_clusters; struct exfat_inode_info *ei = EXFAT_I(inode); struct exfat_cache_id cid; unsigned int content; if (ei->start_clu == EXFAT_FREE_CLUSTER) { exfat_fs_error(sb, "invalid access to exfat cache (entry 0x%08x)", ei->start_clu); return -EIO; } *fclus = 0; *dclus = ei->start_clu; *last_dclus = *dclus; /* * Don`t use exfat_cache if zero offset or non-cluster allocation */ if (cluster == 0 || *dclus == EXFAT_EOF_CLUSTER) return 0; cache_init(&cid, EXFAT_EOF_CLUSTER, EXFAT_EOF_CLUSTER); if (exfat_cache_lookup(inode, cluster, &cid, fclus, dclus) == EXFAT_EOF_CLUSTER) { /* * dummy, always not contiguous * This is reinitialized by cache_init(), later. */ WARN_ON(cid.id != EXFAT_CACHE_VALID || cid.fcluster != EXFAT_EOF_CLUSTER || cid.dcluster != EXFAT_EOF_CLUSTER || cid.nr_contig != 0); } if (*fclus == cluster) return 0; while (*fclus < cluster) { /* prevent the infinite loop of cluster chain */ if (*fclus > limit) { exfat_fs_error(sb, "detected the cluster chain loop (i_pos %u)", (*fclus)); return -EIO; } if (exfat_ent_get(sb, *dclus, &content)) return -EIO; *last_dclus = *dclus; *dclus = content; (*fclus)++; if (content == EXFAT_EOF_CLUSTER) { if (!allow_eof) { exfat_fs_error(sb, "invalid cluster chain (i_pos %u, last_clus 0x%08x is EOF)", *fclus, (*last_dclus)); return -EIO; } break; } if (!cache_contiguous(&cid, *dclus)) cache_init(&cid, *fclus, *dclus); } exfat_cache_add(inode, &cid); return 0; }
306 306 101 102 15 15 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 // SPDX-License-Identifier: GPL-2.0-or-later /* * cn_proc.c - process events connector * * Copyright (C) Matt Helsley, IBM Corp. 2005 * Based on cn_fork.c by Guillaume Thouvenin <guillaume.thouvenin@bull.net> * Original copyright notice follows: * Copyright (C) 2005 BULL SA. */ #include <linux/kernel.h> #include <linux/ktime.h> #include <linux/init.h> #include <linux/connector.h> #include <linux/gfp.h> #include <linux/ptrace.h> #include <linux/atomic.h> #include <linux/pid_namespace.h> #include <linux/cn_proc.h> #include <linux/local_lock.h> /* * Size of a cn_msg followed by a proc_event structure. Since the * sizeof struct cn_msg is a multiple of 4 bytes, but not 8 bytes, we * add one 4-byte word to the size here, and then start the actual * cn_msg structure 4 bytes into the stack buffer. The result is that * the immediately following proc_event structure is aligned to 8 bytes. */ #define CN_PROC_MSG_SIZE (sizeof(struct cn_msg) + sizeof(struct proc_event) + 4) /* See comment above; we test our assumption about sizeof struct cn_msg here. */ static inline struct cn_msg *buffer_to_cn_msg(__u8 *buffer) { BUILD_BUG_ON(sizeof(struct cn_msg) != 20); return (struct cn_msg *)(buffer + 4); } static atomic_t proc_event_num_listeners = ATOMIC_INIT(0); static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC }; /* local_event.count is used as the sequence number of the netlink message */ struct local_event { local_lock_t lock; __u32 count; }; static DEFINE_PER_CPU(struct local_event, local_event) = { .lock = INIT_LOCAL_LOCK(lock), }; static int cn_filter(struct sock *dsk, struct sk_buff *skb, void *data) { __u32 what, exit_code, *ptr; enum proc_cn_mcast_op mc_op; uintptr_t val; if (!dsk || !dsk->sk_user_data || !data) return 0; ptr = (__u32 *)data; what = *ptr++; exit_code = *ptr; val = ((struct proc_input *)(dsk->sk_user_data))->event_type; mc_op = ((struct proc_input *)(dsk->sk_user_data))->mcast_op; if (mc_op == PROC_CN_MCAST_IGNORE) return 1; if ((__u32)val == PROC_EVENT_ALL) return 0; /* * Drop packet if we have to report only non-zero exit status * (PROC_EVENT_NONZERO_EXIT) and exit status is 0 */ if (((__u32)val & PROC_EVENT_NONZERO_EXIT) && (what == PROC_EVENT_EXIT)) { if (exit_code) return 0; } if ((__u32)val & what) return 0; return 1; } static inline void send_msg(struct cn_msg *msg) { __u32 filter_data[2]; local_lock(&local_event.lock); msg->seq = __this_cpu_inc_return(local_event.count) - 1; ((struct proc_event *)msg->data)->cpu = smp_processor_id(); /* * local_lock() disables preemption during send to ensure the messages * are ordered according to their sequence numbers. * * If cn_netlink_send() fails, the data is not sent. */ filter_data[0] = ((struct proc_event *)msg->data)->what; if (filter_data[0] == PROC_EVENT_EXIT) { filter_data[1] = ((struct proc_event *)msg->data)->event_data.exit.exit_code; } else { filter_data[1] = 0; } cn_netlink_send_mult(msg, msg->len, 0, CN_IDX_PROC, GFP_NOWAIT, cn_filter, (void *)filter_data); local_unlock(&local_event.lock); } void proc_fork_connector(struct task_struct *task) { struct cn_msg *msg; struct proc_event *ev; __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); struct task_struct *parent; if (atomic_read(&proc_event_num_listeners) < 1) return; msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_FORK; rcu_read_lock(); parent = rcu_dereference(task->real_parent); ev->event_data.fork.parent_pid = parent->pid; ev->event_data.fork.parent_tgid = parent->tgid; rcu_read_unlock(); ev->event_data.fork.child_pid = task->pid; ev->event_data.fork.child_tgid = task->tgid; memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ send_msg(msg); } void proc_exec_connector(struct task_struct *task) { struct cn_msg *msg; struct proc_event *ev; __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_EXEC; ev->event_data.exec.process_pid = task->pid; ev->event_data.exec.process_tgid = task->tgid; memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ send_msg(msg); } void proc_id_connector(struct task_struct *task, int which_id) { struct cn_msg *msg; struct proc_event *ev; __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); const struct cred *cred; if (atomic_read(&proc_event_num_listeners) < 1) return; msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); ev->what = which_id; ev->event_data.id.process_pid = task->pid; ev->event_data.id.process_tgid = task->tgid; rcu_read_lock(); cred = __task_cred(task); if (which_id == PROC_EVENT_UID) { ev->event_data.id.r.ruid = from_kuid_munged(&init_user_ns, cred->uid); ev->event_data.id.e.euid = from_kuid_munged(&init_user_ns, cred->euid); } else if (which_id == PROC_EVENT_GID) { ev->event_data.id.r.rgid = from_kgid_munged(&init_user_ns, cred->gid); ev->event_data.id.e.egid = from_kgid_munged(&init_user_ns, cred->egid); } else { rcu_read_unlock(); return; } rcu_read_unlock(); ev->timestamp_ns = ktime_get_ns(); memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ send_msg(msg); } void proc_sid_connector(struct task_struct *task) { struct cn_msg *msg; struct proc_event *ev; __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_SID; ev->event_data.sid.process_pid = task->pid; ev->event_data.sid.process_tgid = task->tgid; memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ send_msg(msg); } void proc_ptrace_connector(struct task_struct *task, int ptrace_id) { struct cn_msg *msg; struct proc_event *ev; __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_PTRACE; ev->event_data.ptrace.process_pid = task->pid; ev->event_data.ptrace.process_tgid = task->tgid; if (ptrace_id == PTRACE_ATTACH) { ev->event_data.ptrace.tracer_pid = current->pid; ev->event_data.ptrace.tracer_tgid = current->tgid; } else if (ptrace_id == PTRACE_DETACH) { ev->event_data.ptrace.tracer_pid = 0; ev->event_data.ptrace.tracer_tgid = 0; } else return; memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ send_msg(msg); } void proc_comm_connector(struct task_struct *task) { struct cn_msg *msg; struct proc_event *ev; __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_COMM; ev->event_data.comm.process_pid = task->pid; ev->event_data.comm.process_tgid = task->tgid; get_task_comm(ev->event_data.comm.comm, task); memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ send_msg(msg); } void proc_coredump_connector(struct task_struct *task) { struct cn_msg *msg; struct proc_event *ev; struct task_struct *parent; __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_COREDUMP; ev->event_data.coredump.process_pid = task->pid; ev->event_data.coredump.process_tgid = task->tgid; rcu_read_lock(); if (pid_alive(task)) { parent = rcu_dereference(task->real_parent); ev->event_data.coredump.parent_pid = parent->pid; ev->event_data.coredump.parent_tgid = parent->tgid; } rcu_read_unlock(); memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ send_msg(msg); } void proc_exit_connector(struct task_struct *task) { struct cn_msg *msg; struct proc_event *ev; struct task_struct *parent; __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_EXIT; ev->event_data.exit.process_pid = task->pid; ev->event_data.exit.process_tgid = task->tgid; ev->event_data.exit.exit_code = task->exit_code; ev->event_data.exit.exit_signal = task->exit_signal; rcu_read_lock(); if (pid_alive(task)) { parent = rcu_dereference(task->real_parent); ev->event_data.exit.parent_pid = parent->pid; ev->event_data.exit.parent_tgid = parent->tgid; } rcu_read_unlock(); memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ send_msg(msg); } /* * Send an acknowledgement message to userspace * * Use 0 for success, EFOO otherwise. * Note: this is the negative of conventional kernel error * values because it's not being returned via syscall return * mechanisms. */ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) { struct cn_msg *msg; struct proc_event *ev; __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); msg->seq = rcvd_seq; ev->timestamp_ns = ktime_get_ns(); ev->cpu = -1; ev->what = PROC_EVENT_NONE; ev->event_data.ack.err = err; memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = rcvd_ack + 1; msg->len = sizeof(*ev); msg->flags = 0; /* not used */ send_msg(msg); } /** * cn_proc_mcast_ctl * @msg: message sent from userspace via the connector * @nsp: NETLINK_CB of the client's socket buffer */ static void cn_proc_mcast_ctl(struct cn_msg *msg, struct netlink_skb_parms *nsp) { enum proc_cn_mcast_op mc_op = 0, prev_mc_op = 0; struct proc_input *pinput = NULL; enum proc_cn_event ev_type = 0; int err = 0, initial = 0; struct sock *sk = NULL; /* * Events are reported with respect to the initial pid * and user namespaces so ignore requestors from * other namespaces. */ if ((current_user_ns() != &init_user_ns) || !task_is_in_init_pid_ns(current)) return; if (msg->len == sizeof(*pinput)) { pinput = (struct proc_input *)msg->data; mc_op = pinput->mcast_op; ev_type = pinput->event_type; } else if (msg->len == sizeof(mc_op)) { mc_op = *((enum proc_cn_mcast_op *)msg->data); ev_type = PROC_EVENT_ALL; } else { return; } ev_type = valid_event((enum proc_cn_event)ev_type); if (ev_type == PROC_EVENT_NONE) ev_type = PROC_EVENT_ALL; if (nsp->sk) { sk = nsp->sk; if (sk->sk_user_data == NULL) { sk->sk_user_data = kzalloc(sizeof(struct proc_input), GFP_KERNEL); if (sk->sk_user_data == NULL) { err = ENOMEM; goto out; } initial = 1; } else { prev_mc_op = ((struct proc_input *)(sk->sk_user_data))->mcast_op; } ((struct proc_input *)(sk->sk_user_data))->event_type = ev_type; ((struct proc_input *)(sk->sk_user_data))->mcast_op = mc_op; } switch (mc_op) { case PROC_CN_MCAST_LISTEN: if (initial || (prev_mc_op != PROC_CN_MCAST_LISTEN)) atomic_inc(&proc_event_num_listeners); break; case PROC_CN_MCAST_IGNORE: if (!initial && (prev_mc_op != PROC_CN_MCAST_IGNORE)) atomic_dec(&proc_event_num_listeners); ((struct proc_input *)(sk->sk_user_data))->event_type = PROC_EVENT_NONE; break; default: err = EINVAL; break; } out: cn_proc_ack(err, msg->seq, msg->ack); } /* * cn_proc_init - initialization entry point * * Adds the connector callback to the connector driver. */ static int __init cn_proc_init(void) { int err = cn_add_callback(&cn_proc_event_id, "cn_proc", &cn_proc_mcast_ctl); if (err) { pr_warn("cn_proc failed to register\n"); return err; } return 0; } device_initcall(cn_proc_init);
13 13 13 13 13 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 // SPDX-License-Identifier: GPL-2.0 /****************************************************************************** * rtl8712_recv.c * * Copyright(c) 2007 - 2010 Realtek Corporation. All rights reserved. * Linux device driver for RTL8192SU * * Modifications for inclusion into the Linux staging tree are * Copyright(c) 2010 Larry Finger. All rights reserved. * * Contact information: * WLAN FAE <wlanfae@realtek.com> * Larry Finger <Larry.Finger@lwfinger.net> * ******************************************************************************/ #define _RTL8712_RECV_C_ #include <linux/if_ether.h> #include <linux/ip.h> #include <net/cfg80211.h> #include "osdep_service.h" #include "drv_types.h" #include "recv_osdep.h" #include "mlme_osdep.h" #include "ethernet.h" #include "usb_ops.h" #include "wifi.h" static void recv_tasklet(struct tasklet_struct *t); int r8712_init_recv_priv(struct recv_priv *precvpriv, struct _adapter *padapter) { int i; struct recv_buf *precvbuf; addr_t tmpaddr = 0; int alignment = 0; struct sk_buff *pskb = NULL; /*init recv_buf*/ _init_queue(&precvpriv->free_recv_buf_queue); precvpriv->pallocated_recv_buf = kzalloc(NR_RECVBUFF * sizeof(struct recv_buf) + 4, GFP_ATOMIC); if (!precvpriv->pallocated_recv_buf) return -ENOMEM; precvpriv->precv_buf = precvpriv->pallocated_recv_buf + 4 - ((addr_t)(precvpriv->pallocated_recv_buf) & 3); precvbuf = (struct recv_buf *)precvpriv->precv_buf; for (i = 0; i < NR_RECVBUFF; i++) { INIT_LIST_HEAD(&precvbuf->list); spin_lock_init(&precvbuf->recvbuf_lock); if (r8712_os_recvbuf_resource_alloc(padapter, precvbuf)) break; precvbuf->ref_cnt = 0; precvbuf->adapter = padapter; list_add_tail(&precvbuf->list, &precvpriv->free_recv_buf_queue.queue); precvbuf++; } precvpriv->free_recv_buf_queue_cnt = NR_RECVBUFF; tasklet_setup(&precvpriv->recv_tasklet, recv_tasklet); skb_queue_head_init(&precvpriv->rx_skb_queue); skb_queue_head_init(&precvpriv->free_recv_skb_queue); for (i = 0; i < NR_PREALLOC_RECV_SKB; i++) { pskb = netdev_alloc_skb(padapter->pnetdev, MAX_RECVBUF_SZ + RECVBUFF_ALIGN_SZ); if (pskb) { tmpaddr = (addr_t)pskb->data; alignment = tmpaddr & (RECVBUFF_ALIGN_SZ - 1); skb_reserve(pskb, (RECVBUFF_ALIGN_SZ - alignment)); skb_queue_tail(&precvpriv->free_recv_skb_queue, pskb); } pskb = NULL; } return 0; } void r8712_free_recv_priv(struct recv_priv *precvpriv) { int i; struct recv_buf *precvbuf; struct _adapter *padapter = precvpriv->adapter; precvbuf = (struct recv_buf *)precvpriv->precv_buf; for (i = 0; i < NR_RECVBUFF; i++) { r8712_os_recvbuf_resource_free(padapter, precvbuf); precvbuf++; } kfree(precvpriv->pallocated_recv_buf); skb_queue_purge(&precvpriv->rx_skb_queue); if (skb_queue_len(&precvpriv->rx_skb_queue)) netdev_warn(padapter->pnetdev, "r8712u: rx_skb_queue not empty\n"); skb_queue_purge(&precvpriv->free_recv_skb_queue); if (skb_queue_len(&precvpriv->free_recv_skb_queue)) netdev_warn(padapter->pnetdev, "r8712u: free_recv_skb_queue not empty %d\n", skb_queue_len(&precvpriv->free_recv_skb_queue)); } void r8712_init_recvbuf(struct _adapter *padapter, struct recv_buf *precvbuf) { precvbuf->transfer_len = 0; precvbuf->len = 0; precvbuf->ref_cnt = 0; if (precvbuf->pbuf) { precvbuf->pdata = precvbuf->pbuf; precvbuf->phead = precvbuf->pbuf; precvbuf->ptail = precvbuf->pbuf; precvbuf->pend = precvbuf->pdata + MAX_RECVBUF_SZ; } } void r8712_free_recvframe(union recv_frame *precvframe, struct __queue *pfree_recv_queue) { unsigned long irqL; struct _adapter *padapter = precvframe->u.hdr.adapter; struct recv_priv *precvpriv = &padapter->recvpriv; if (precvframe->u.hdr.pkt) { dev_kfree_skb_any(precvframe->u.hdr.pkt);/*free skb by driver*/ precvframe->u.hdr.pkt = NULL; } spin_lock_irqsave(&pfree_recv_queue->lock, irqL); list_del_init(&precvframe->u.hdr.list); list_add_tail(&precvframe->u.hdr.list, &pfree_recv_queue->queue); if (padapter) { if (pfree_recv_queue == &precvpriv->free_recv_queue) precvpriv->free_recvframe_cnt++; } spin_unlock_irqrestore(&pfree_recv_queue->lock, irqL); } static void update_recvframe_attrib_from_recvstat(struct rx_pkt_attrib *pattrib, struct recv_stat *prxstat) { u16 drvinfo_sz; drvinfo_sz = (le32_to_cpu(prxstat->rxdw0) & 0x000f0000) >> 16; drvinfo_sz <<= 3; /*TODO: * Offset 0 */ pattrib->bdecrypted = (le32_to_cpu(prxstat->rxdw0) & BIT(27)) == 0; pattrib->crc_err = (le32_to_cpu(prxstat->rxdw0) & BIT(14)) != 0; /*Offset 4*/ /*Offset 8*/ /*Offset 12*/ if (le32_to_cpu(prxstat->rxdw3) & BIT(13)) { pattrib->tcpchk_valid = 1; /* valid */ if (le32_to_cpu(prxstat->rxdw3) & BIT(11)) pattrib->tcp_chkrpt = 1; /* correct */ else pattrib->tcp_chkrpt = 0; /* incorrect */ if (le32_to_cpu(prxstat->rxdw3) & BIT(12)) pattrib->ip_chkrpt = 1; /* correct */ else pattrib->ip_chkrpt = 0; /* incorrect */ } else { pattrib->tcpchk_valid = 0; /* invalid */ } pattrib->mcs_rate = (u8)((le32_to_cpu(prxstat->rxdw3)) & 0x3f); pattrib->htc = (u8)((le32_to_cpu(prxstat->rxdw3) >> 14) & 0x1); /*Offset 16*/ /*Offset 20*/ /*phy_info*/ } /*perform defrag*/ static union recv_frame *recvframe_defrag(struct _adapter *adapter, struct __queue *defrag_q) { struct list_head *plist, *phead; u8 wlanhdr_offset; u8 curfragnum; struct recv_frame_hdr *pfhdr, *pnfhdr; union recv_frame *prframe, *pnextrframe; struct __queue *pfree_recv_queue; pfree_recv_queue = &adapter->recvpriv.free_recv_queue; phead = &defrag_q->queue; plist = phead->next; prframe = container_of(plist, union recv_frame, u.list); list_del_init(&prframe->u.list); pfhdr = &prframe->u.hdr; curfragnum = 0; if (curfragnum != pfhdr->attrib.frag_num) { /*the first fragment number must be 0 *free the whole queue */ r8712_free_recvframe(prframe, pfree_recv_queue); r8712_free_recvframe_queue(defrag_q, pfree_recv_queue); return NULL; } curfragnum++; plist = &defrag_q->queue; plist = plist->next; while (!end_of_queue_search(phead, plist)) { pnextrframe = container_of(plist, union recv_frame, u.list); pnfhdr = &pnextrframe->u.hdr; /*check the fragment sequence (2nd ~n fragment frame) */ if (curfragnum != pnfhdr->attrib.frag_num) { /* the fragment number must increase (after decache) * release the defrag_q & prframe */ r8712_free_recvframe(prframe, pfree_recv_queue); r8712_free_recvframe_queue(defrag_q, pfree_recv_queue); return NULL; } curfragnum++; /* copy the 2nd~n fragment frame's payload to the first fragment * get the 2nd~last fragment frame's payload */ wlanhdr_offset = pnfhdr->attrib.hdrlen + pnfhdr->attrib.iv_len; recvframe_pull(pnextrframe, wlanhdr_offset); /* append to first fragment frame's tail (if privacy frame, * pull the ICV) */ recvframe_pull_tail(prframe, pfhdr->attrib.icv_len); memcpy(pfhdr->rx_tail, pnfhdr->rx_data, pnfhdr->len); recvframe_put(prframe, pnfhdr->len); pfhdr->attrib.icv_len = pnfhdr->attrib.icv_len; plist = plist->next; } /* free the defrag_q queue and return the prframe */ r8712_free_recvframe_queue(defrag_q, pfree_recv_queue); return prframe; } /* check if need to defrag, if needed queue the frame to defrag_q */ union recv_frame *r8712_recvframe_chk_defrag(struct _adapter *padapter, union recv_frame *precv_frame) { u8 ismfrag; u8 fragnum; u8 *psta_addr; struct recv_frame_hdr *pfhdr; struct sta_info *psta; struct sta_priv *pstapriv; struct list_head *phead; union recv_frame *prtnframe = NULL; struct __queue *pfree_recv_queue, *pdefrag_q; pstapriv = &padapter->stapriv; pfhdr = &precv_frame->u.hdr; pfree_recv_queue = &padapter->recvpriv.free_recv_queue; /* need to define struct of wlan header frame ctrl */ ismfrag = pfhdr->attrib.mfrag; fragnum = pfhdr->attrib.frag_num; psta_addr = pfhdr->attrib.ta; psta = r8712_get_stainfo(pstapriv, psta_addr); if (!psta) pdefrag_q = NULL; else pdefrag_q = &psta->sta_recvpriv.defrag_q; if ((ismfrag == 0) && (fragnum == 0)) prtnframe = precv_frame;/*isn't a fragment frame*/ if (ismfrag == 1) { /* 0~(n-1) fragment frame * enqueue to defraf_g */ if (pdefrag_q) { if (fragnum == 0) { /*the first fragment*/ if (!list_empty(&pdefrag_q->queue)) { /*free current defrag_q */ r8712_free_recvframe_queue(pdefrag_q, pfree_recv_queue); } } /* Then enqueue the 0~(n-1) fragment to the defrag_q */ phead = &pdefrag_q->queue; list_add_tail(&pfhdr->list, phead); prtnframe = NULL; } else { /* can't find this ta's defrag_queue, so free this * recv_frame */ r8712_free_recvframe(precv_frame, pfree_recv_queue); prtnframe = NULL; } } if ((ismfrag == 0) && (fragnum != 0)) { /* the last fragment frame * enqueue the last fragment */ if (pdefrag_q) { phead = &pdefrag_q->queue; list_add_tail(&pfhdr->list, phead); /*call recvframe_defrag to defrag*/ precv_frame = recvframe_defrag(padapter, pdefrag_q); prtnframe = precv_frame; } else { /* can't find this ta's defrag_queue, so free this * recv_frame */ r8712_free_recvframe(precv_frame, pfree_recv_queue); prtnframe = NULL; } } if (prtnframe && (prtnframe->u.hdr.attrib.privacy)) { /* after defrag we must check tkip mic code */ if (r8712_recvframe_chkmic(padapter, prtnframe) == _FAIL) { r8712_free_recvframe(prtnframe, pfree_recv_queue); prtnframe = NULL; } } return prtnframe; } static void amsdu_to_msdu(struct _adapter *padapter, union recv_frame *prframe) { int a_len, padding_len; u16 eth_type, nSubframe_Length; u8 nr_subframes, i; unsigned char *pdata; struct rx_pkt_attrib *pattrib; _pkt *sub_skb, *subframes[MAX_SUBFRAME_COUNT]; struct recv_priv *precvpriv = &padapter->recvpriv; struct __queue *pfree_recv_queue = &precvpriv->free_recv_queue; nr_subframes = 0; pattrib = &prframe->u.hdr.attrib; recvframe_pull(prframe, prframe->u.hdr.attrib.hdrlen); if (prframe->u.hdr.attrib.iv_len > 0) recvframe_pull(prframe, prframe->u.hdr.attrib.iv_len); a_len = prframe->u.hdr.len; pdata = prframe->u.hdr.rx_data; while (a_len > ETH_HLEN) { /* Offset 12 denote 2 mac address */ nSubframe_Length = *((u16 *)(pdata + 12)); /*==m==>change the length order*/ nSubframe_Length = (nSubframe_Length >> 8) + (nSubframe_Length << 8); if (a_len < (ETHERNET_HEADER_SIZE + nSubframe_Length)) { netdev_warn(padapter->pnetdev, "r8712u: nRemain_Length is %d and nSubframe_Length is: %d\n", a_len, nSubframe_Length); goto exit; } /* move the data point to data content */ pdata += ETH_HLEN; a_len -= ETH_HLEN; /* Allocate new skb for releasing to upper layer */ sub_skb = dev_alloc_skb(nSubframe_Length + 12); if (!sub_skb) break; skb_reserve(sub_skb, 12); skb_put_data(sub_skb, pdata, nSubframe_Length); subframes[nr_subframes++] = sub_skb; if (nr_subframes >= MAX_SUBFRAME_COUNT) { netdev_warn(padapter->pnetdev, "r8712u: ParseSubframe(): Too many Subframes! Packets dropped!\n"); break; } pdata += nSubframe_Length; a_len -= nSubframe_Length; if (a_len != 0) { padding_len = 4 - ((nSubframe_Length + ETH_HLEN) & 3); if (padding_len == 4) padding_len = 0; if (a_len < padding_len) goto exit; pdata += padding_len; a_len -= padding_len; } } for (i = 0; i < nr_subframes; i++) { sub_skb = subframes[i]; /* convert hdr + possible LLC headers into Ethernet header */ eth_type = (sub_skb->data[6] << 8) | sub_skb->data[7]; if (sub_skb->len >= 8 && ((!memcmp(sub_skb->data, rfc1042_header, SNAP_SIZE) && eth_type != ETH_P_AARP && eth_type != ETH_P_IPX) || !memcmp(sub_skb->data, bridge_tunnel_header, SNAP_SIZE))) { /* remove RFC1042 or Bridge-Tunnel encapsulation and * replace EtherType */ skb_pull(sub_skb, SNAP_SIZE); memcpy(skb_push(sub_skb, ETH_ALEN), pattrib->src, ETH_ALEN); memcpy(skb_push(sub_skb, ETH_ALEN), pattrib->dst, ETH_ALEN); } else { __be16 len; /* Leave Ethernet header part of hdr and full payload */ len = htons(sub_skb->len); memcpy(skb_push(sub_skb, 2), &len, 2); memcpy(skb_push(sub_skb, ETH_ALEN), pattrib->src, ETH_ALEN); memcpy(skb_push(sub_skb, ETH_ALEN), pattrib->dst, ETH_ALEN); } /* Indicate the packets to upper layer */ if (sub_skb) { sub_skb->protocol = eth_type_trans(sub_skb, padapter->pnetdev); sub_skb->dev = padapter->pnetdev; if ((pattrib->tcpchk_valid == 1) && (pattrib->tcp_chkrpt == 1)) { sub_skb->ip_summed = CHECKSUM_UNNECESSARY; } else { sub_skb->ip_summed = CHECKSUM_NONE; } netif_rx(sub_skb); } } exit: prframe->u.hdr.len = 0; r8712_free_recvframe(prframe, pfree_recv_queue); } void r8712_rxcmd_event_hdl(struct _adapter *padapter, void *prxcmdbuf) { __le32 voffset; u8 *poffset; u16 cmd_len, drvinfo_sz; struct recv_stat *prxstat; poffset = prxcmdbuf; voffset = *(__le32 *)poffset; prxstat = prxcmdbuf; drvinfo_sz = (le32_to_cpu(prxstat->rxdw0) & 0x000f0000) >> 16; drvinfo_sz <<= 3; poffset += RXDESC_SIZE + drvinfo_sz; do { voffset = *(__le32 *)poffset; cmd_len = (u16)(le32_to_cpu(voffset) & 0xffff); r8712_event_handle(padapter, (__le32 *)poffset); poffset += (cmd_len + 8);/*8 bytes alignment*/ } while (le32_to_cpu(voffset) & BIT(31)); } static int check_indicate_seq(struct recv_reorder_ctrl *preorder_ctrl, u16 seq_num) { u8 wsize = preorder_ctrl->wsize_b; u16 wend = (preorder_ctrl->indicate_seq + wsize - 1) % 4096; /* Rx Reorder initialize condition.*/ if (preorder_ctrl->indicate_seq == 0xffff) preorder_ctrl->indicate_seq = seq_num; /* Drop out the packet which SeqNum is smaller than WinStart */ if (SN_LESS(seq_num, preorder_ctrl->indicate_seq)) return false; /* * Sliding window manipulation. Conditions includes: * 1. Incoming SeqNum is equal to WinStart =>Window shift 1 * 2. Incoming SeqNum is larger than the WinEnd => Window shift N */ if (SN_EQUAL(seq_num, preorder_ctrl->indicate_seq)) preorder_ctrl->indicate_seq = (preorder_ctrl->indicate_seq + 1) % 4096; else if (SN_LESS(wend, seq_num)) { if (seq_num >= (wsize - 1)) preorder_ctrl->indicate_seq = seq_num + 1 - wsize; else preorder_ctrl->indicate_seq = 4095 - (wsize - (seq_num + 1)) + 1; } return true; } static int enqueue_reorder_recvframe(struct recv_reorder_ctrl *preorder_ctrl, union recv_frame *prframe) { struct list_head *phead, *plist; union recv_frame *pnextrframe; struct rx_pkt_attrib *pnextattrib; struct __queue *ppending_recvframe_queue = &preorder_ctrl->pending_recvframe_queue; struct rx_pkt_attrib *pattrib = &prframe->u.hdr.attrib; phead = &ppending_recvframe_queue->queue; plist = phead->next; while (!end_of_queue_search(phead, plist)) { pnextrframe = container_of(plist, union recv_frame, u.list); pnextattrib = &pnextrframe->u.hdr.attrib; if (SN_EQUAL(pnextattrib->seq_num, pattrib->seq_num)) return false; if (SN_LESS(pnextattrib->seq_num, pattrib->seq_num)) plist = plist->next; else break; } list_del_init(&prframe->u.hdr.list); list_add_tail(&prframe->u.hdr.list, plist); return true; } int r8712_recv_indicatepkts_in_order(struct _adapter *padapter, struct recv_reorder_ctrl *preorder_ctrl, int bforced) { struct list_head *phead, *plist; union recv_frame *prframe; struct rx_pkt_attrib *pattrib; int bPktInBuf = false; struct __queue *ppending_recvframe_queue = &preorder_ctrl->pending_recvframe_queue; phead = &ppending_recvframe_queue->queue; plist = phead->next; /* Handling some condition for forced indicate case.*/ if (bforced) { if (list_empty(phead)) return true; prframe = container_of(plist, union recv_frame, u.list); pattrib = &prframe->u.hdr.attrib; preorder_ctrl->indicate_seq = pattrib->seq_num; } /* Prepare indication list and indication. * Check if there is any packet need indicate. */ while (!list_empty(phead)) { prframe = container_of(plist, union recv_frame, u.list); pattrib = &prframe->u.hdr.attrib; if (!SN_LESS(preorder_ctrl->indicate_seq, pattrib->seq_num)) { plist = plist->next; list_del_init(&prframe->u.hdr.list); if (SN_EQUAL(preorder_ctrl->indicate_seq, pattrib->seq_num)) preorder_ctrl->indicate_seq = (preorder_ctrl->indicate_seq + 1) % 4096; /*indicate this recv_frame*/ if (!pattrib->amsdu) { if (!padapter->driver_stopped && !padapter->surprise_removed) { /* indicate this recv_frame */ r8712_recv_indicatepkt(padapter, prframe); } } else if (pattrib->amsdu == 1) { amsdu_to_msdu(padapter, prframe); } /* Update local variables. */ bPktInBuf = false; } else { bPktInBuf = true; break; } } return bPktInBuf; } static int recv_indicatepkt_reorder(struct _adapter *padapter, union recv_frame *prframe) { unsigned long irql; struct rx_pkt_attrib *pattrib = &prframe->u.hdr.attrib; struct recv_reorder_ctrl *preorder_ctrl = prframe->u.hdr.preorder_ctrl; struct __queue *ppending_recvframe_queue = &preorder_ctrl->pending_recvframe_queue; if (!pattrib->amsdu) { /* s1. */ r8712_wlanhdr_to_ethhdr(prframe); if (pattrib->qos != 1) { if (!padapter->driver_stopped && !padapter->surprise_removed) { r8712_recv_indicatepkt(padapter, prframe); return 0; } else { return -EINVAL; } } } spin_lock_irqsave(&ppending_recvframe_queue->lock, irql); /*s2. check if winstart_b(indicate_seq) needs to be updated*/ if (!check_indicate_seq(preorder_ctrl, pattrib->seq_num)) goto _err_exit; /*s3. Insert all packet into Reorder Queue to maintain its ordering.*/ if (!enqueue_reorder_recvframe(preorder_ctrl, prframe)) goto _err_exit; /*s4. * Indication process. * After Packet dropping and Sliding Window shifting as above, we can * now just indicate the packets with the SeqNum smaller than latest * WinStart and buffer other packets. * * For Rx Reorder condition: * 1. All packets with SeqNum smaller than WinStart => Indicate * 2. All packets with SeqNum larger than or equal to * WinStart => Buffer it. */ if (r8712_recv_indicatepkts_in_order(padapter, preorder_ctrl, false)) { mod_timer(&preorder_ctrl->reordering_ctrl_timer, jiffies + msecs_to_jiffies(REORDER_WAIT_TIME)); spin_unlock_irqrestore(&ppending_recvframe_queue->lock, irql); } else { spin_unlock_irqrestore(&ppending_recvframe_queue->lock, irql); del_timer(&preorder_ctrl->reordering_ctrl_timer); } return 0; _err_exit: spin_unlock_irqrestore(&ppending_recvframe_queue->lock, irql); return -ENOMEM; } void r8712_reordering_ctrl_timeout_handler(void *pcontext) { unsigned long irql; struct recv_reorder_ctrl *preorder_ctrl = pcontext; struct _adapter *padapter = preorder_ctrl->padapter; struct __queue *ppending_recvframe_queue = &preorder_ctrl->pending_recvframe_queue; if (padapter->driver_stopped || padapter->surprise_removed) return; spin_lock_irqsave(&ppending_recvframe_queue->lock, irql); r8712_recv_indicatepkts_in_order(padapter, preorder_ctrl, true); spin_unlock_irqrestore(&ppending_recvframe_queue->lock, irql); } static int r8712_process_recv_indicatepkts(struct _adapter *padapter, union recv_frame *prframe) { int retval = _SUCCESS; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct ht_priv *phtpriv = &pmlmepriv->htpriv; if (phtpriv->ht_option == 1) { /*B/G/N Mode*/ if (recv_indicatepkt_reorder(padapter, prframe)) { /* including perform A-MPDU Rx Ordering Buffer Control*/ if (!padapter->driver_stopped && !padapter->surprise_removed) return _FAIL; } } else { /*B/G mode*/ retval = r8712_wlanhdr_to_ethhdr(prframe); if (retval) return _FAIL; if (!padapter->driver_stopped && !padapter->surprise_removed) { /* indicate this recv_frame */ r8712_recv_indicatepkt(padapter, prframe); } else { return _FAIL; } } return retval; } static u8 query_rx_pwr_percentage(s8 antpower) { if ((antpower <= -100) || (antpower >= 20)) return 0; else if (antpower >= 0) return 100; else return 100 + antpower; } static u8 evm_db2percentage(s8 value) { /* * -33dB~0dB to 0%~99% */ s8 ret_val = clamp(-value, 0, 33) * 3; if (ret_val == 99) ret_val = 100; return ret_val; } s32 r8712_signal_scale_mapping(s32 cur_sig) { s32 ret_sig; if (cur_sig >= 51 && cur_sig <= 100) ret_sig = 100; else if (cur_sig >= 41 && cur_sig <= 50) ret_sig = 80 + ((cur_sig - 40) * 2); else if (cur_sig >= 31 && cur_sig <= 40) ret_sig = 66 + (cur_sig - 30); else if (cur_sig >= 21 && cur_sig <= 30) ret_sig = 54 + (cur_sig - 20); else if (cur_sig >= 10 && cur_sig <= 20) ret_sig = 42 + (((cur_sig - 10) * 2) / 3); else if (cur_sig >= 5 && cur_sig <= 9) ret_sig = 22 + (((cur_sig - 5) * 3) / 2); else if (cur_sig >= 1 && cur_sig <= 4) ret_sig = 6 + (((cur_sig - 1) * 3) / 2); else ret_sig = cur_sig; return ret_sig; } static s32 translate2dbm(struct _adapter *padapter, u8 signal_strength_idx) { s32 signal_power; /* in dBm.*/ /* Translate to dBm (x=0.5y-95).*/ signal_power = (s32)((signal_strength_idx + 1) >> 1); signal_power -= 95; return signal_power; } static void query_rx_phy_status(struct _adapter *padapter, union recv_frame *prframe) { u8 i, max_spatial_stream, evm; struct recv_stat *prxstat = (struct recv_stat *)prframe->u.hdr.rx_head; struct phy_stat *pphy_stat = (struct phy_stat *)(prxstat + 1); u8 *pphy_head = (u8 *)(prxstat + 1); s8 rx_pwr[4], rx_pwr_all; u8 pwdb_all; u32 rssi, total_rssi = 0; u8 bcck_rate = 0, rf_rx_num = 0, cck_highpwr = 0; struct phy_cck_rx_status *pcck_buf; u8 sq; /* Record it for next packet processing*/ bcck_rate = (prframe->u.hdr.attrib.mcs_rate <= 3 ? 1 : 0); if (bcck_rate) { u8 report; /* CCK Driver info Structure is not the same as OFDM packet.*/ pcck_buf = (struct phy_cck_rx_status *)pphy_stat; /* (1)Hardware does not provide RSSI for CCK * (2)PWDB, Average PWDB calculated by hardware * (for rate adaptive) */ if (!cck_highpwr) { report = pcck_buf->cck_agc_rpt & 0xc0; report >>= 6; switch (report) { /* Modify the RF RNA gain value to -40, -20, * -2, 14 by Jenyu's suggestion * Note: different RF with the different * RNA gain. */ case 0x3: rx_pwr_all = -40 - (pcck_buf->cck_agc_rpt & 0x3e); break; case 0x2: rx_pwr_all = -20 - (pcck_buf->cck_agc_rpt & 0x3e); break; case 0x1: rx_pwr_all = -2 - (pcck_buf->cck_agc_rpt & 0x3e); break; case 0x0: rx_pwr_all = 14 - (pcck_buf->cck_agc_rpt & 0x3e); break; } } else { report = ((u8)(le32_to_cpu(pphy_stat->phydw1) >> 8)) & 0x60; report >>= 5; switch (report) { case 0x3: rx_pwr_all = -40 - ((pcck_buf->cck_agc_rpt & 0x1f) << 1); break; case 0x2: rx_pwr_all = -20 - ((pcck_buf->cck_agc_rpt & 0x1f) << 1); break; case 0x1: rx_pwr_all = -2 - ((pcck_buf->cck_agc_rpt & 0x1f) << 1); break; case 0x0: rx_pwr_all = 14 - ((pcck_buf->cck_agc_rpt & 0x1f) << 1); break; } } pwdb_all = query_rx_pwr_percentage(rx_pwr_all); /* CCK gain is smaller than OFDM/MCS gain,*/ /* so we add gain diff by experiences, the val is 6 */ pwdb_all += 6; if (pwdb_all > 100) pwdb_all = 100; /* modify the offset to make the same gain index with OFDM.*/ if (pwdb_all > 34 && pwdb_all <= 42) pwdb_all -= 2; else if (pwdb_all > 26 && pwdb_all <= 34) pwdb_all -= 6; else if (pwdb_all > 14 && pwdb_all <= 26) pwdb_all -= 8; else if (pwdb_all > 4 && pwdb_all <= 14) pwdb_all -= 4; /* * (3) Get Signal Quality (EVM) */ if (pwdb_all > 40) { sq = 100; } else { sq = pcck_buf->sq_rpt; if (pcck_buf->sq_rpt > 64) sq = 0; else if (pcck_buf->sq_rpt < 20) sq = 100; else sq = ((64 - sq) * 100) / 44; } prframe->u.hdr.attrib.signal_qual = sq; prframe->u.hdr.attrib.rx_mimo_signal_qual[0] = sq; prframe->u.hdr.attrib.rx_mimo_signal_qual[1] = -1; } else { /* (1)Get RSSI for HT rate */ for (i = 0; i < ((padapter->registrypriv.rf_config) & 0x0f); i++) { rf_rx_num++; rx_pwr[i] = ((pphy_head[PHY_STAT_GAIN_TRSW_SHT + i] & 0x3F) * 2) - 110; /* Translate DBM to percentage. */ rssi = query_rx_pwr_percentage(rx_pwr[i]); total_rssi += rssi; } /* (2)PWDB, Average PWDB calculated by hardware (for * rate adaptive) */ rx_pwr_all = (((pphy_head[PHY_STAT_PWDB_ALL_SHT]) >> 1) & 0x7f) - 106; pwdb_all = query_rx_pwr_percentage(rx_pwr_all); { /* (3)EVM of HT rate */ if (prframe->u.hdr.attrib.htc && prframe->u.hdr.attrib.mcs_rate >= 20 && prframe->u.hdr.attrib.mcs_rate <= 27) { /* both spatial stream make sense */ max_spatial_stream = 2; } else { /* only spatial stream 1 makes sense */ max_spatial_stream = 1; } for (i = 0; i < max_spatial_stream; i++) { evm = evm_db2percentage((pphy_head [PHY_STAT_RXEVM_SHT + i]));/*dbm*/ prframe->u.hdr.attrib.signal_qual = (u8)(evm & 0xff); prframe->u.hdr.attrib.rx_mimo_signal_qual[i] = (u8)(evm & 0xff); } } } /* UI BSS List signal strength(in percentage), make it good looking, * from 0~100. It is assigned to the BSS List in * GetValueFromBeaconOrProbeRsp(). */ if (bcck_rate) { prframe->u.hdr.attrib.signal_strength = (u8)r8712_signal_scale_mapping(pwdb_all); } else { if (rf_rx_num != 0) prframe->u.hdr.attrib.signal_strength = (u8)(r8712_signal_scale_mapping(total_rssi /= rf_rx_num)); } } static void process_link_qual(struct _adapter *padapter, union recv_frame *prframe) { u32 last_evm = 0, avg_val; struct rx_pkt_attrib *pattrib; struct smooth_rssi_data *sqd = &padapter->recvpriv.signal_qual_data; if (!prframe || !padapter) return; pattrib = &prframe->u.hdr.attrib; if (pattrib->signal_qual != 0) { /* * 1. Record the general EVM to the sliding window. */ if (sqd->total_num++ >= PHY_LINKQUALITY_SLID_WIN_MAX) { sqd->total_num = PHY_LINKQUALITY_SLID_WIN_MAX; last_evm = sqd->elements[sqd->index]; sqd->total_val -= last_evm; } sqd->total_val += pattrib->signal_qual; sqd->elements[sqd->index++] = pattrib->signal_qual; if (sqd->index >= PHY_LINKQUALITY_SLID_WIN_MAX) sqd->index = 0; /* <1> Showed on UI for user, in percentage. */ avg_val = sqd->total_val / sqd->total_num; padapter->recvpriv.signal = (u8)avg_val; } } static void process_rssi(struct _adapter *padapter, union recv_frame *prframe) { u32 last_rssi, tmp_val; struct rx_pkt_attrib *pattrib = &prframe->u.hdr.attrib; struct smooth_rssi_data *ssd = &padapter->recvpriv.signal_strength_data; if (ssd->total_num++ >= PHY_RSSI_SLID_WIN_MAX) { ssd->total_num = PHY_RSSI_SLID_WIN_MAX; last_rssi = ssd->elements[ssd->index]; ssd->total_val -= last_rssi; } ssd->total_val += pattrib->signal_strength; ssd->elements[ssd->index++] = pattrib->signal_strength; if (ssd->index >= PHY_RSSI_SLID_WIN_MAX) ssd->index = 0; tmp_val = ssd->total_val / ssd->total_num; padapter->recvpriv.rssi = (s8)translate2dbm(padapter, (u8)tmp_val); } static void process_phy_info(struct _adapter *padapter, union recv_frame *prframe) { query_rx_phy_status(padapter, prframe); process_rssi(padapter, prframe); process_link_qual(padapter, prframe); } int recv_func(struct _adapter *padapter, void *pcontext) { struct rx_pkt_attrib *pattrib; union recv_frame *prframe, *orig_prframe; int retval = _SUCCESS; struct __queue *pfree_recv_queue = &padapter->recvpriv.free_recv_queue; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; prframe = pcontext; orig_prframe = prframe; pattrib = &prframe->u.hdr.attrib; if (check_fwstate(pmlmepriv, WIFI_MP_STATE)) { if (pattrib->crc_err == 1) padapter->mppriv.rx_crcerrpktcount++; else padapter->mppriv.rx_pktcount++; if (!check_fwstate(pmlmepriv, WIFI_MP_LPBK_STATE)) { /* free this recv_frame */ r8712_free_recvframe(orig_prframe, pfree_recv_queue); goto _exit_recv_func; } } /* check the frame crtl field and decache */ retval = r8712_validate_recv_frame(padapter, prframe); if (retval != _SUCCESS) { /* free this recv_frame */ r8712_free_recvframe(orig_prframe, pfree_recv_queue); goto _exit_recv_func; } process_phy_info(padapter, prframe); prframe = r8712_decryptor(padapter, prframe); if (!prframe) { retval = _FAIL; goto _exit_recv_func; } prframe = r8712_recvframe_chk_defrag(padapter, prframe); if (!prframe) goto _exit_recv_func; prframe = r8712_portctrl(padapter, prframe); if (!prframe) { retval = _FAIL; goto _exit_recv_func; } retval = r8712_process_recv_indicatepkts(padapter, prframe); if (retval != _SUCCESS) { r8712_free_recvframe(orig_prframe, pfree_recv_queue); goto _exit_recv_func; } _exit_recv_func: return retval; } static void recvbuf2recvframe(struct _adapter *padapter, struct sk_buff *pskb) { u8 *pbuf, shift_sz = 0; u8 frag, mf; uint pkt_len; u32 transfer_len; struct recv_stat *prxstat; u16 pkt_cnt, drvinfo_sz, pkt_offset, tmp_len, alloc_sz; struct __queue *pfree_recv_queue; _pkt *pkt_copy = NULL; union recv_frame *precvframe = NULL; struct recv_priv *precvpriv = &padapter->recvpriv; pfree_recv_queue = &precvpriv->free_recv_queue; pbuf = pskb->data; prxstat = (struct recv_stat *)pbuf; pkt_cnt = (le32_to_cpu(prxstat->rxdw2) >> 16) & 0xff; pkt_len = le32_to_cpu(prxstat->rxdw0) & 0x00003fff; transfer_len = pskb->len; /* Test throughput with Netgear 3700 (No security) with Chariot 3T3R * pairs. The packet count will be a big number so that the containing * packet will effect the Rx reordering. */ if (transfer_len < pkt_len) { /* In this case, it means the MAX_RECVBUF_SZ is too small to * get the data from 8712u. */ return; } do { prxstat = (struct recv_stat *)pbuf; pkt_len = le32_to_cpu(prxstat->rxdw0) & 0x00003fff; /* more fragment bit */ mf = (le32_to_cpu(prxstat->rxdw1) >> 27) & 0x1; /* ragmentation number */ frag = (le32_to_cpu(prxstat->rxdw2) >> 12) & 0xf; /* uint 2^3 = 8 bytes */ drvinfo_sz = (le32_to_cpu(prxstat->rxdw0) & 0x000f0000) >> 16; drvinfo_sz <<= 3; if (pkt_len <= 0) return; /* Qos data, wireless lan header length is 26 */ if ((le32_to_cpu(prxstat->rxdw0) >> 23) & 0x01) shift_sz = 2; precvframe = r8712_alloc_recvframe(pfree_recv_queue); if (!precvframe) return; INIT_LIST_HEAD(&precvframe->u.hdr.list); precvframe->u.hdr.precvbuf = NULL; /*can't access the precvbuf*/ precvframe->u.hdr.len = 0; tmp_len = pkt_len + drvinfo_sz + RXDESC_SIZE; pkt_offset = (u16)round_up(tmp_len, 128); /* for first fragment packet, driver need allocate 1536 + * drvinfo_sz + RXDESC_SIZE to defrag packet. */ if ((mf == 1) && (frag == 0)) /*1658+6=1664, 1664 is 128 alignment.*/ alloc_sz = max_t(u16, tmp_len, 1658); else alloc_sz = tmp_len; /* 2 is for IP header 4 bytes alignment in QoS packet case. * 4 is for skb->data 4 bytes alignment. */ alloc_sz += 6; pkt_copy = netdev_alloc_skb(padapter->pnetdev, alloc_sz); if (!pkt_copy) return; precvframe->u.hdr.pkt = pkt_copy; skb_reserve(pkt_copy, 4 - ((addr_t)(pkt_copy->data) % 4)); skb_reserve(pkt_copy, shift_sz); memcpy(pkt_copy->data, pbuf, tmp_len); precvframe->u.hdr.rx_head = pkt_copy->data; precvframe->u.hdr.rx_data = pkt_copy->data; precvframe->u.hdr.rx_tail = pkt_copy->data; precvframe->u.hdr.rx_end = pkt_copy->data + alloc_sz; recvframe_put(precvframe, tmp_len); recvframe_pull(precvframe, drvinfo_sz + RXDESC_SIZE); /* because the endian issue, driver avoid reference to the * rxstat after calling update_recvframe_attrib_from_recvstat(); */ update_recvframe_attrib_from_recvstat(&precvframe->u.hdr.attrib, prxstat); r8712_recv_entry(precvframe); transfer_len -= pkt_offset; pbuf += pkt_offset; pkt_cnt--; precvframe = NULL; pkt_copy = NULL; } while ((transfer_len > 0) && pkt_cnt > 0); } static void recv_tasklet(struct tasklet_struct *t) { struct sk_buff *pskb; struct _adapter *padapter = from_tasklet(padapter, t, recvpriv.recv_tasklet); struct recv_priv *precvpriv = &padapter->recvpriv; while (NULL != (pskb = skb_dequeue(&precvpriv->rx_skb_queue))) { recvbuf2recvframe(padapter, pskb); skb_reset_tail_pointer(pskb); pskb->len = 0; if (!skb_cloned(pskb)) skb_queue_tail(&precvpriv->free_recv_skb_queue, pskb); else consume_skb(pskb); } }
7 2 4 4 1 23 22 4 22 3 1 2 2 28 9 2 2 2 2 23 2 2 25 24 4 13 13 9 39 39 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 // SPDX-License-Identifier: GPL-2.0 /* XDP user-space packet buffer * Copyright(c) 2018 Intel Corporation. */ #include <linux/init.h> #include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/uaccess.h> #include <linux/slab.h> #include <linux/bpf.h> #include <linux/mm.h> #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/idr.h> #include <linux/vmalloc.h> #include "xdp_umem.h" #include "xsk_queue.h" static DEFINE_IDA(umem_ida); static void xdp_umem_unpin_pages(struct xdp_umem *umem) { unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); kvfree(umem->pgs); umem->pgs = NULL; } static void xdp_umem_unaccount_pages(struct xdp_umem *umem) { if (umem->user) { atomic_long_sub(umem->npgs, &umem->user->locked_vm); free_uid(umem->user); } } static void xdp_umem_addr_unmap(struct xdp_umem *umem) { vunmap(umem->addrs); umem->addrs = NULL; } static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, u32 nr_pages) { umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); if (!umem->addrs) return -ENOMEM; return 0; } static void xdp_umem_release(struct xdp_umem *umem) { umem->zc = false; ida_free(&umem_ida, umem->id); xdp_umem_addr_unmap(umem); xdp_umem_unpin_pages(umem); xdp_umem_unaccount_pages(umem); kfree(umem); } static void xdp_umem_release_deferred(struct work_struct *work) { struct xdp_umem *umem = container_of(work, struct xdp_umem, work); xdp_umem_release(umem); } void xdp_get_umem(struct xdp_umem *umem) { refcount_inc(&umem->users); } void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) { if (!umem) return; if (refcount_dec_and_test(&umem->users)) { if (defer_cleanup) { INIT_WORK(&umem->work, xdp_umem_release_deferred); schedule_work(&umem->work); } else { xdp_umem_release(umem); } } } static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) { unsigned int gup_flags = FOLL_WRITE; long npgs; int err; umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN); if (!umem->pgs) return -ENOMEM; mmap_read_lock(current->mm); npgs = pin_user_pages(address, umem->npgs, gup_flags | FOLL_LONGTERM, &umem->pgs[0]); mmap_read_unlock(current->mm); if (npgs != umem->npgs) { if (npgs >= 0) { umem->npgs = npgs; err = -ENOMEM; goto out_pin; } err = npgs; goto out_pgs; } return 0; out_pin: xdp_umem_unpin_pages(umem); out_pgs: kvfree(umem->pgs); umem->pgs = NULL; return err; } static int xdp_umem_account_pages(struct xdp_umem *umem) { unsigned long lock_limit, new_npgs, old_npgs; if (capable(CAP_IPC_LOCK)) return 0; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; umem->user = get_uid(current_user()); do { old_npgs = atomic_long_read(&umem->user->locked_vm); new_npgs = old_npgs + umem->npgs; if (new_npgs > lock_limit) { free_uid(umem->user); umem->user = NULL; return -ENOBUFS; } } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, new_npgs) != old_npgs); return 0; } #define XDP_UMEM_FLAGS_VALID ( \ XDP_UMEM_UNALIGNED_CHUNK_FLAG | \ XDP_UMEM_TX_SW_CSUM | \ XDP_UMEM_TX_METADATA_LEN | \ 0) static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; u32 chunk_size = mr->chunk_size, headroom = mr->headroom; u64 addr = mr->addr, size = mr->len; u32 chunks_rem, npgs_rem; u64 chunks, npgs; int err; if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { /* Strictly speaking we could support this, if: * - huge pages, or* * - using an IOMMU, or * - making sure the memory area is consecutive * but for now, we simply say "computer says no". */ return -EINVAL; } if (mr->flags & ~XDP_UMEM_FLAGS_VALID) return -EINVAL; if (!unaligned_chunks && !is_power_of_2(chunk_size)) return -EINVAL; if (!PAGE_ALIGNED(addr)) { /* Memory area has to be page size aligned. For * simplicity, this might change. */ return -EINVAL; } if ((addr + size) < addr) return -EINVAL; npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); if (npgs_rem) npgs++; if (npgs > U32_MAX) return -EINVAL; chunks = div_u64_rem(size, chunk_size, &chunks_rem); if (!chunks || chunks > U32_MAX) return -EINVAL; if (!unaligned_chunks && chunks_rem) return -EINVAL; if (headroom >= chunk_size - XDP_PACKET_HEADROOM) return -EINVAL; if (mr->flags & XDP_UMEM_TX_METADATA_LEN) { if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8) return -EINVAL; umem->tx_metadata_len = mr->tx_metadata_len; } umem->size = size; umem->headroom = headroom; umem->chunk_size = chunk_size; umem->chunks = chunks; umem->npgs = npgs; umem->pgs = NULL; umem->user = NULL; umem->flags = mr->flags; INIT_LIST_HEAD(&umem->xsk_dma_list); refcount_set(&umem->users, 1); err = xdp_umem_account_pages(umem); if (err) return err; err = xdp_umem_pin_pages(umem, (unsigned long)addr); if (err) goto out_account; err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); if (err) goto out_unpin; return 0; out_unpin: xdp_umem_unpin_pages(umem); out_account: xdp_umem_unaccount_pages(umem); return err; } struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) { struct xdp_umem *umem; int err; umem = kzalloc(sizeof(*umem), GFP_KERNEL); if (!umem) return ERR_PTR(-ENOMEM); err = ida_alloc(&umem_ida, GFP_KERNEL); if (err < 0) { kfree(umem); return ERR_PTR(err); } umem->id = err; err = xdp_umem_reg(umem, mr); if (err) { ida_free(&umem_ida, umem->id); kfree(umem); return ERR_PTR(err); } return umem; }
453 78 14 131 72 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Internal procfs definitions * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/proc_fs.h> #include <linux/proc_ns.h> #include <linux/refcount.h> #include <linux/spinlock.h> #include <linux/atomic.h> #include <linux/binfmts.h> #include <linux/sched/coredump.h> #include <linux/sched/task.h> #include <linux/mm.h> struct ctl_table_header; struct mempolicy; /* * This is not completely implemented yet. The idea is to * create an in-memory tree (like the actual /proc filesystem * tree) of these proc_dir_entries, so that we can dynamically * add new files to /proc. * * parent/subdir are used for the directory structure (every /proc file has a * parent, but "subdir" is empty for all non-directory entries). * subdir_node is used to build the rb tree "subdir" of the parent. */ struct proc_dir_entry { /* * number of callers into module in progress; * negative -> it's going away RSN */ atomic_t in_use; refcount_t refcnt; struct list_head pde_openers; /* who did ->open, but not ->release */ /* protects ->pde_openers and all struct pde_opener instances */ spinlock_t pde_unload_lock; struct completion *pde_unload_completion; const struct inode_operations *proc_iops; union { const struct proc_ops *proc_ops; const struct file_operations *proc_dir_ops; }; const struct dentry_operations *proc_dops; union { const struct seq_operations *seq_ops; int (*single_show)(struct seq_file *, void *); }; proc_write_t write; void *data; unsigned int state_size; unsigned int low_ino; nlink_t nlink; kuid_t uid; kgid_t gid; loff_t size; struct proc_dir_entry *parent; struct rb_root subdir; struct rb_node subdir_node; char *name; umode_t mode; u8 flags; u8 namelen; char inline_name[]; } __randomize_layout; #define SIZEOF_PDE ( \ sizeof(struct proc_dir_entry) < 128 ? 128 : \ sizeof(struct proc_dir_entry) < 192 ? 192 : \ sizeof(struct proc_dir_entry) < 256 ? 256 : \ sizeof(struct proc_dir_entry) < 512 ? 512 : \ 0) #define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE - sizeof(struct proc_dir_entry)) static inline bool pde_is_permanent(const struct proc_dir_entry *pde) { return pde->flags & PROC_ENTRY_PERMANENT; } static inline void pde_make_permanent(struct proc_dir_entry *pde) { pde->flags |= PROC_ENTRY_PERMANENT; } extern struct kmem_cache *proc_dir_entry_cache; void pde_free(struct proc_dir_entry *pde); union proc_op { int (*proc_get_link)(struct dentry *, struct path *); int (*proc_show)(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); int lsmid; }; struct proc_inode { struct pid *pid; unsigned int fd; union proc_op op; struct proc_dir_entry *pde; struct ctl_table_header *sysctl; struct ctl_table *sysctl_entry; struct hlist_node sibling_inodes; const struct proc_ns_operations *ns_ops; struct inode vfs_inode; } __randomize_layout; /* * General functions */ static inline struct proc_inode *PROC_I(const struct inode *inode) { return container_of(inode, struct proc_inode, vfs_inode); } static inline struct proc_dir_entry *PDE(const struct inode *inode) { return PROC_I(inode)->pde; } static inline struct pid *proc_pid(const struct inode *inode) { return PROC_I(inode)->pid; } static inline struct task_struct *get_proc_task(const struct inode *inode) { return get_pid_task(proc_pid(inode), PIDTYPE_PID); } void task_dump_owner(struct task_struct *task, umode_t mode, kuid_t *ruid, kgid_t *rgid); unsigned name_to_int(const struct qstr *qstr); /* * Offset of the first process in the /proc root directory.. */ #define FIRST_PROCESS_ENTRY 256 /* Worst case buffer size needed for holding an integer. */ #define PROC_NUMBUF 13 /** * folio_precise_page_mapcount() - Number of mappings of this folio page. * @folio: The folio. * @page: The page. * * The number of present user page table entries that reference this page * as tracked via the RMAP: either referenced directly (PTE) or as part of * a larger area that covers this page (e.g., PMD). * * Use this function only for the calculation of existing statistics * (USS, PSS, mapcount_max) and for debugging purposes (/proc/kpagecount). * * Do not add new users. * * Returns: The number of mappings of this folio page. 0 for * folios that are not mapped to user space or are not tracked via the RMAP * (e.g., shared zeropage). */ static inline int folio_precise_page_mapcount(struct folio *folio, struct page *page) { int mapcount = atomic_read(&page->_mapcount) + 1; /* Handle page_has_type() pages */ if (mapcount < PAGE_MAPCOUNT_RESERVE + 1) mapcount = 0; if (folio_test_large(folio)) mapcount += folio_entire_mapcount(folio); return mapcount; } /* * array.c */ extern const struct file_operations proc_tid_children_operations; extern void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape); extern int proc_tid_stat(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); extern int proc_pid_status(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); extern int proc_pid_statm(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); /* * base.c */ extern const struct dentry_operations pid_dentry_operations; extern int pid_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); extern int proc_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); extern void proc_pid_evict_inode(struct proc_inode *); extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t); extern void pid_update_inode(struct task_struct *, struct inode *); extern int pid_delete_dentry(const struct dentry *); extern int proc_pid_readdir(struct file *, struct dir_context *); struct dentry *proc_pid_lookup(struct dentry *, unsigned int); extern loff_t mem_lseek(struct file *, loff_t, int); /* Lookups */ typedef struct dentry *instantiate_t(struct dentry *, struct task_struct *, const void *); bool proc_fill_cache(struct file *, struct dir_context *, const char *, unsigned int, instantiate_t, struct task_struct *, const void *); /* * generic.c */ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, struct proc_dir_entry **parent, void *data); struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, struct proc_dir_entry *dp); extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *); extern int proc_readdir(struct file *, struct dir_context *); int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *); static inline void pde_get(struct proc_dir_entry *pde) { refcount_inc(&pde->refcnt); } extern void pde_put(struct proc_dir_entry *); static inline bool is_empty_pde(const struct proc_dir_entry *pde) { return S_ISDIR(pde->mode) && !pde->proc_iops; } extern ssize_t proc_simple_write(struct file *, const char __user *, size_t, loff_t *); /* * inode.c */ struct pde_opener { struct list_head lh; struct file *file; bool closing; struct completion *c; } __randomize_layout; extern const struct inode_operations proc_link_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations; extern const struct super_operations proc_sops; void proc_init_kmemcache(void); void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock); void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); extern void proc_entry_rundown(struct proc_dir_entry *); /* * proc_namespaces.c */ extern const struct inode_operations proc_ns_dir_inode_operations; extern const struct file_operations proc_ns_dir_operations; /* * proc_net.c */ extern const struct file_operations proc_net_operations; extern const struct inode_operations proc_net_inode_operations; #ifdef CONFIG_NET extern int proc_net_init(void); #else static inline int proc_net_init(void) { return 0; } #endif /* * proc_self.c */ extern int proc_setup_self(struct super_block *); /* * proc_thread_self.c */ extern int proc_setup_thread_self(struct super_block *); extern void proc_thread_self_init(void); /* * proc_sysctl.c */ #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); extern void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head); #else static inline void proc_sys_init(void) { } static inline void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) { } #endif /* * proc_tty.c */ #ifdef CONFIG_TTY extern void proc_tty_init(void); #else static inline void proc_tty_init(void) {} #endif /* * root.c */ extern struct proc_dir_entry proc_root; extern void proc_self_init(void); /* * task_[no]mmu.c */ struct mem_size_stats; struct proc_maps_private { struct inode *inode; struct task_struct *task; struct mm_struct *mm; struct vma_iterator iter; #ifdef CONFIG_NUMA struct mempolicy *task_mempolicy; #endif } __randomize_layout; struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode); extern const struct file_operations proc_pid_maps_operations; extern const struct file_operations proc_pid_numa_maps_operations; extern const struct file_operations proc_pid_smaps_operations; extern const struct file_operations proc_pid_smaps_rollup_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern unsigned long task_vsize(struct mm_struct *); extern unsigned long task_statm(struct mm_struct *, unsigned long *, unsigned long *, unsigned long *, unsigned long *); extern void task_mem(struct seq_file *, struct mm_struct *); extern const struct dentry_operations proc_net_dentry_ops; static inline void pde_force_lookup(struct proc_dir_entry *pde) { /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */ pde->proc_dops = &proc_net_dentry_ops; } /* * Add a new procfs dentry that can't serve as a mountpoint. That should * encompass anything that is ephemeral and can just disappear while the * process is still around. */ static inline struct dentry *proc_splice_unmountable(struct inode *inode, struct dentry *dentry, const struct dentry_operations *d_ops) { d_set_d_op(dentry, d_ops); dont_mount(dentry); return d_splice_alias(inode, dentry); }
71 2 72 2 26 27 26 10 10 3 5 6 5 59 35 12 21 71 72 8 85 93 92 93 91 93 93 93 93 19 74 2 72 71 91 53 50 27 4 10 55 54 53 14 30 56 53 50 37 51 27 25 6 6 6 24 28 23 87 78 14 7 28 24 23 70 3 106 83 4 105 4 104 94 97 2 27 82 27 25 82 84 84 82 78 28 29 2 24 24 24 2 1 14 12 2 11 1 14 7 7 18 18 18 12 6 12 12 18 6 12 61 12 12 12 2 10 9 4 2 4 4 4 4 3 2 2 2 13 53 53 77 77 53 13 9 2 61 12 2 70 13 59 60 2 2 15 1 4 6 6 9 9 2 2 1 3 2 2 82 82 47 70 13 13 12 65 1 62 1 11 1 54 41 14 49 13 41 14 54 55 2 5 2 1 1 20 19 19 19 18 3 25 17 2 19 35 36 27 27 3 17 2 19 2 17 19 19 19 18 19 19 57 53 19 54 20 57 57 55 40 40 40 22 40 39 40 95 96 10 4 6 82 70 70 12 45 9 6 14 2 4 7 1 1 1 1 1 1 1 13 1 5 1 5 1 7 1 2 2 282 281 281 282 282 37 54 54 13 41 36 8 7 16 7 5 2 20 7 17 7 60 62 61 57 7 56 56 56 55 14 42 4 39 37 52 37 27 11 11 36 36 4 5 31 9 25 19 20 5 16 16 1 16 9 14 11 24 16 8 2 23 10 10 10 10 2 8 7 2 25 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 /* * videobuf2-core.c - video buffer 2 core framework * * Copyright (C) 2010 Samsung Electronics * * Author: Pawel Osciak <pawel@osciak.com> * Marek Szyprowski <m.szyprowski@samsung.com> * * The vb2_thread implementation was based on code from videobuf-dvb.c: * (c) 2004 Gerd Knorr <kraxel@bytesex.org> [SUSE Labs] * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/err.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/freezer.h> #include <linux/kthread.h> #include <media/videobuf2-core.h> #include <media/v4l2-mc.h> #include <trace/events/vb2.h> #define PLANE_INDEX_BITS 3 #define PLANE_INDEX_SHIFT (PAGE_SHIFT + PLANE_INDEX_BITS) #define PLANE_INDEX_MASK (BIT_MASK(PLANE_INDEX_BITS) - 1) #define MAX_BUFFER_INDEX BIT_MASK(30 - PLANE_INDEX_SHIFT) #define BUFFER_INDEX_MASK (MAX_BUFFER_INDEX - 1) #if BIT(PLANE_INDEX_BITS) != VIDEO_MAX_PLANES #error PLANE_INDEX_BITS order must be equal to VIDEO_MAX_PLANES #endif static int debug; module_param(debug, int, 0644); #define dprintk(q, level, fmt, arg...) \ do { \ if (debug >= level) \ pr_info("[%s] %s: " fmt, (q)->name, __func__, \ ## arg); \ } while (0) #ifdef CONFIG_VIDEO_ADV_DEBUG /* * If advanced debugging is on, then count how often each op is called * successfully, which can either be per-buffer or per-queue. * * This makes it easy to check that the 'init' and 'cleanup' * (and variations thereof) stay balanced. */ #define log_memop(vb, op) \ dprintk((vb)->vb2_queue, 2, "call_memop(%d, %s)%s\n", \ (vb)->index, #op, \ (vb)->vb2_queue->mem_ops->op ? "" : " (nop)") #define call_memop(vb, op, args...) \ ({ \ struct vb2_queue *_q = (vb)->vb2_queue; \ int err; \ \ log_memop(vb, op); \ err = _q->mem_ops->op ? _q->mem_ops->op(args) : 0; \ if (!err) \ (vb)->cnt_mem_ ## op++; \ err; \ }) #define call_ptr_memop(op, vb, args...) \ ({ \ struct vb2_queue *_q = (vb)->vb2_queue; \ void *ptr; \ \ log_memop(vb, op); \ ptr = _q->mem_ops->op ? _q->mem_ops->op(vb, args) : NULL; \ if (!IS_ERR_OR_NULL(ptr)) \ (vb)->cnt_mem_ ## op++; \ ptr; \ }) #define call_void_memop(vb, op, args...) \ ({ \ struct vb2_queue *_q = (vb)->vb2_queue; \ \ log_memop(vb, op); \ if (_q->mem_ops->op) \ _q->mem_ops->op(args); \ (vb)->cnt_mem_ ## op++; \ }) #define log_qop(q, op) \ dprintk(q, 2, "call_qop(%s)%s\n", #op, \ (q)->ops->op ? "" : " (nop)") #define call_qop(q, op, args...) \ ({ \ int err; \ \ log_qop(q, op); \ err = (q)->ops->op ? (q)->ops->op(args) : 0; \ if (!err) \ (q)->cnt_ ## op++; \ err; \ }) #define call_void_qop(q, op, args...) \ ({ \ log_qop(q, op); \ if ((q)->ops->op) \ (q)->ops->op(args); \ (q)->cnt_ ## op++; \ }) #define log_vb_qop(vb, op, args...) \ dprintk((vb)->vb2_queue, 2, "call_vb_qop(%d, %s)%s\n", \ (vb)->index, #op, \ (vb)->vb2_queue->ops->op ? "" : " (nop)") #define call_vb_qop(vb, op, args...) \ ({ \ int err; \ \ log_vb_qop(vb, op); \ err = (vb)->vb2_queue->ops->op ? \ (vb)->vb2_queue->ops->op(args) : 0; \ if (!err) \ (vb)->cnt_ ## op++; \ err; \ }) #define call_void_vb_qop(vb, op, args...) \ ({ \ log_vb_qop(vb, op); \ if ((vb)->vb2_queue->ops->op) \ (vb)->vb2_queue->ops->op(args); \ (vb)->cnt_ ## op++; \ }) #else #define call_memop(vb, op, args...) \ ((vb)->vb2_queue->mem_ops->op ? \ (vb)->vb2_queue->mem_ops->op(args) : 0) #define call_ptr_memop(op, vb, args...) \ ((vb)->vb2_queue->mem_ops->op ? \ (vb)->vb2_queue->mem_ops->op(vb, args) : NULL) #define call_void_memop(vb, op, args...) \ do { \ if ((vb)->vb2_queue->mem_ops->op) \ (vb)->vb2_queue->mem_ops->op(args); \ } while (0) #define call_qop(q, op, args...) \ ((q)->ops->op ? (q)->ops->op(args) : 0) #define call_void_qop(q, op, args...) \ do { \ if ((q)->ops->op) \ (q)->ops->op(args); \ } while (0) #define call_vb_qop(vb, op, args...) \ ((vb)->vb2_queue->ops->op ? (vb)->vb2_queue->ops->op(args) : 0) #define call_void_vb_qop(vb, op, args...) \ do { \ if ((vb)->vb2_queue->ops->op) \ (vb)->vb2_queue->ops->op(args); \ } while (0) #endif #define call_bufop(q, op, args...) \ ({ \ int ret = 0; \ if (q && q->buf_ops && q->buf_ops->op) \ ret = q->buf_ops->op(args); \ ret; \ }) #define call_void_bufop(q, op, args...) \ ({ \ if (q && q->buf_ops && q->buf_ops->op) \ q->buf_ops->op(args); \ }) static void __vb2_queue_cancel(struct vb2_queue *q); static const char *vb2_state_name(enum vb2_buffer_state s) { static const char * const state_names[] = { [VB2_BUF_STATE_DEQUEUED] = "dequeued", [VB2_BUF_STATE_IN_REQUEST] = "in request", [VB2_BUF_STATE_PREPARING] = "preparing", [VB2_BUF_STATE_QUEUED] = "queued", [VB2_BUF_STATE_ACTIVE] = "active", [VB2_BUF_STATE_DONE] = "done", [VB2_BUF_STATE_ERROR] = "error", }; if ((unsigned int)(s) < ARRAY_SIZE(state_names)) return state_names[s]; return "unknown"; } /* * __vb2_buf_mem_alloc() - allocate video memory for the given buffer */ static int __vb2_buf_mem_alloc(struct vb2_buffer *vb) { struct vb2_queue *q = vb->vb2_queue; void *mem_priv; int plane; int ret = -ENOMEM; /* * Allocate memory for all planes in this buffer * NOTE: mmapped areas should be page aligned */ for (plane = 0; plane < vb->num_planes; ++plane) { /* Memops alloc requires size to be page aligned. */ unsigned long size = PAGE_ALIGN(vb->planes[plane].length); /* Did it wrap around? */ if (size < vb->planes[plane].length) goto free; mem_priv = call_ptr_memop(alloc, vb, q->alloc_devs[plane] ? : q->dev, size); if (IS_ERR_OR_NULL(mem_priv)) { if (mem_priv) ret = PTR_ERR(mem_priv); goto free; } /* Associate allocator private data with this plane */ vb->planes[plane].mem_priv = mem_priv; } return 0; free: /* Free already allocated memory if one of the allocations failed */ for (; plane > 0; --plane) { call_void_memop(vb, put, vb->planes[plane - 1].mem_priv); vb->planes[plane - 1].mem_priv = NULL; } return ret; } /* * __vb2_buf_mem_free() - free memory of the given buffer */ static void __vb2_buf_mem_free(struct vb2_buffer *vb) { unsigned int plane; for (plane = 0; plane < vb->num_planes; ++plane) { call_void_memop(vb, put, vb->planes[plane].mem_priv); vb->planes[plane].mem_priv = NULL; dprintk(vb->vb2_queue, 3, "freed plane %d of buffer %d\n", plane, vb->index); } } /* * __vb2_buf_userptr_put() - release userspace memory associated with * a USERPTR buffer */ static void __vb2_buf_userptr_put(struct vb2_buffer *vb) { unsigned int plane; for (plane = 0; plane < vb->num_planes; ++plane) { if (vb->planes[plane].mem_priv) call_void_memop(vb, put_userptr, vb->planes[plane].mem_priv); vb->planes[plane].mem_priv = NULL; } } /* * __vb2_plane_dmabuf_put() - release memory associated with * a DMABUF shared plane */ static void __vb2_plane_dmabuf_put(struct vb2_buffer *vb, struct vb2_plane *p) { if (!p->mem_priv) return; if (p->dbuf_mapped) call_void_memop(vb, unmap_dmabuf, p->mem_priv); call_void_memop(vb, detach_dmabuf, p->mem_priv); dma_buf_put(p->dbuf); p->mem_priv = NULL; p->dbuf = NULL; p->dbuf_mapped = 0; } /* * __vb2_buf_dmabuf_put() - release memory associated with * a DMABUF shared buffer */ static void __vb2_buf_dmabuf_put(struct vb2_buffer *vb) { unsigned int plane; for (plane = 0; plane < vb->num_planes; ++plane) __vb2_plane_dmabuf_put(vb, &vb->planes[plane]); } /* * __vb2_buf_mem_prepare() - call ->prepare() on buffer's private memory * to sync caches */ static void __vb2_buf_mem_prepare(struct vb2_buffer *vb) { unsigned int plane; if (vb->synced) return; vb->synced = 1; for (plane = 0; plane < vb->num_planes; ++plane) call_void_memop(vb, prepare, vb->planes[plane].mem_priv); } /* * __vb2_buf_mem_finish() - call ->finish on buffer's private memory * to sync caches */ static void __vb2_buf_mem_finish(struct vb2_buffer *vb) { unsigned int plane; if (!vb->synced) return; vb->synced = 0; for (plane = 0; plane < vb->num_planes; ++plane) call_void_memop(vb, finish, vb->planes[plane].mem_priv); } /* * __setup_offsets() - setup unique offsets ("cookies") for every plane in * the buffer. */ static void __setup_offsets(struct vb2_buffer *vb) { struct vb2_queue *q = vb->vb2_queue; unsigned int plane; unsigned long offset = 0; /* * The offset "cookie" value has the following constraints: * - a buffer can have up to 8 planes. * - v4l2 mem2mem uses bit 30 to distinguish between * OUTPUT (aka "source", bit 30 is 0) and * CAPTURE (aka "destination", bit 30 is 1) buffers. * - must be page aligned * That led to this bit mapping when PAGE_SHIFT = 12: * |30 |29 15|14 12|11 0| * |DST_QUEUE_OFF_BASE|buffer index|plane index| 0 | * where there are 15 bits to store the buffer index. * Depending on PAGE_SHIFT value we can have fewer bits * to store the buffer index. */ offset = vb->index << PLANE_INDEX_SHIFT; for (plane = 0; plane < vb->num_planes; ++plane) { vb->planes[plane].m.offset = offset + (plane << PAGE_SHIFT); dprintk(q, 3, "buffer %d, plane %d offset 0x%08lx\n", vb->index, plane, offset); } } static void init_buffer_cache_hints(struct vb2_queue *q, struct vb2_buffer *vb) { /* * DMA exporter should take care of cache syncs, so we can avoid * explicit ->prepare()/->finish() syncs. For other ->memory types * we always need ->prepare() or/and ->finish() cache sync. */ if (q->memory == VB2_MEMORY_DMABUF) { vb->skip_cache_sync_on_finish = 1; vb->skip_cache_sync_on_prepare = 1; return; } /* * ->finish() cache sync can be avoided when queue direction is * TO_DEVICE. */ if (q->dma_dir == DMA_TO_DEVICE) vb->skip_cache_sync_on_finish = 1; } /** * vb2_queue_add_buffer() - add a buffer to a queue * @q: pointer to &struct vb2_queue with videobuf2 queue. * @vb: pointer to &struct vb2_buffer to be added to the queue. * @index: index where add vb2_buffer in the queue */ static void vb2_queue_add_buffer(struct vb2_queue *q, struct vb2_buffer *vb, unsigned int index) { WARN_ON(index >= q->max_num_buffers || test_bit(index, q->bufs_bitmap) || vb->vb2_queue); q->bufs[index] = vb; vb->index = index; vb->vb2_queue = q; set_bit(index, q->bufs_bitmap); } /** * vb2_queue_remove_buffer() - remove a buffer from a queue * @vb: pointer to &struct vb2_buffer to be removed from the queue. */ static void vb2_queue_remove_buffer(struct vb2_buffer *vb) { clear_bit(vb->index, vb->vb2_queue->bufs_bitmap); vb->vb2_queue->bufs[vb->index] = NULL; vb->vb2_queue = NULL; } /* * __vb2_queue_alloc() - allocate vb2 buffer structures and (for MMAP type) * video buffer memory for all buffers/planes on the queue and initializes the * queue * @first_index: index of the first created buffer, all newly allocated buffers * have indices in the range [first_index..first_index+count-1] * * Returns the number of buffers successfully allocated. */ static int __vb2_queue_alloc(struct vb2_queue *q, enum vb2_memory memory, unsigned int num_buffers, unsigned int num_planes, const unsigned int plane_sizes[VB2_MAX_PLANES], unsigned int *first_index) { unsigned int buffer, plane; struct vb2_buffer *vb; unsigned long index = q->max_num_buffers; int ret; /* * Ensure that the number of already queue + the number of buffers already * in the queue is below q->max_num_buffers */ num_buffers = min_t(unsigned int, num_buffers, q->max_num_buffers - vb2_get_num_buffers(q)); while (num_buffers) { index = bitmap_find_next_zero_area(q->bufs_bitmap, q->max_num_buffers, 0, num_buffers, 0); if (index < q->max_num_buffers) break; /* Try to find free space for less buffers */ num_buffers--; } /* If there is no space left to allocate buffers return 0 to indicate the error */ if (!num_buffers) { *first_index = 0; return 0; } *first_index = index; for (buffer = 0; buffer < num_buffers; ++buffer) { /* Allocate vb2 buffer structures */ vb = kzalloc(q->buf_struct_size, GFP_KERNEL); if (!vb) { dprintk(q, 1, "memory alloc for buffer struct failed\n"); break; } vb->state = VB2_BUF_STATE_DEQUEUED; vb->num_planes = num_planes; vb->type = q->type; vb->memory = memory; init_buffer_cache_hints(q, vb); for (plane = 0; plane < num_planes; ++plane) { vb->planes[plane].length = plane_sizes[plane]; vb->planes[plane].min_length = plane_sizes[plane]; } vb2_queue_add_buffer(q, vb, index++); call_void_bufop(q, init_buffer, vb); /* Allocate video buffer memory for the MMAP type */ if (memory == VB2_MEMORY_MMAP) { ret = __vb2_buf_mem_alloc(vb); if (ret) { dprintk(q, 1, "failed allocating memory for buffer %d\n", buffer); vb2_queue_remove_buffer(vb); kfree(vb); break; } __setup_offsets(vb); /* * Call the driver-provided buffer initialization * callback, if given. An error in initialization * results in queue setup failure. */ ret = call_vb_qop(vb, buf_init, vb); if (ret) { dprintk(q, 1, "buffer %d %p initialization failed\n", buffer, vb); __vb2_buf_mem_free(vb); vb2_queue_remove_buffer(vb); kfree(vb); break; } } } dprintk(q, 3, "allocated %d buffers, %d plane(s) each\n", buffer, num_planes); return buffer; } /* * __vb2_free_mem() - release video buffer memory for a given range of * buffers in a given queue */ static void __vb2_free_mem(struct vb2_queue *q, unsigned int start, unsigned int count) { unsigned int i; struct vb2_buffer *vb; for (i = start; i < start + count; i++) { vb = vb2_get_buffer(q, i); if (!vb) continue; /* Free MMAP buffers or release USERPTR buffers */ if (q->memory == VB2_MEMORY_MMAP) __vb2_buf_mem_free(vb); else if (q->memory == VB2_MEMORY_DMABUF) __vb2_buf_dmabuf_put(vb); else __vb2_buf_userptr_put(vb); } } /* * __vb2_queue_free() - free @count buffers from @start index of the queue - video memory and * related information, if no buffers are left return the queue to an * uninitialized state. Might be called even if the queue has already been freed. */ static void __vb2_queue_free(struct vb2_queue *q, unsigned int start, unsigned int count) { unsigned int i; lockdep_assert_held(&q->mmap_lock); /* Call driver-provided cleanup function for each buffer, if provided */ for (i = start; i < start + count; i++) { struct vb2_buffer *vb = vb2_get_buffer(q, i); if (vb && vb->planes[0].mem_priv) call_void_vb_qop(vb, buf_cleanup, vb); } /* Release video buffer memory */ __vb2_free_mem(q, start, count); #ifdef CONFIG_VIDEO_ADV_DEBUG /* * Check that all the calls were balanced during the life-time of this * queue. If not then dump the counters to the kernel log. */ if (vb2_get_num_buffers(q)) { bool unbalanced = q->cnt_start_streaming != q->cnt_stop_streaming || q->cnt_prepare_streaming != q->cnt_unprepare_streaming || q->cnt_wait_prepare != q->cnt_wait_finish; if (unbalanced) { pr_info("unbalanced counters for queue %p:\n", q); if (q->cnt_start_streaming != q->cnt_stop_streaming) pr_info(" setup: %u start_streaming: %u stop_streaming: %u\n", q->cnt_queue_setup, q->cnt_start_streaming, q->cnt_stop_streaming); if (q->cnt_prepare_streaming != q->cnt_unprepare_streaming) pr_info(" prepare_streaming: %u unprepare_streaming: %u\n", q->cnt_prepare_streaming, q->cnt_unprepare_streaming); if (q->cnt_wait_prepare != q->cnt_wait_finish) pr_info(" wait_prepare: %u wait_finish: %u\n", q->cnt_wait_prepare, q->cnt_wait_finish); } q->cnt_queue_setup = 0; q->cnt_wait_prepare = 0; q->cnt_wait_finish = 0; q->cnt_prepare_streaming = 0; q->cnt_start_streaming = 0; q->cnt_stop_streaming = 0; q->cnt_unprepare_streaming = 0; } for (i = start; i < start + count; i++) { struct vb2_buffer *vb = vb2_get_buffer(q, i); bool unbalanced; if (!vb) continue; unbalanced = vb->cnt_mem_alloc != vb->cnt_mem_put || vb->cnt_mem_prepare != vb->cnt_mem_finish || vb->cnt_mem_get_userptr != vb->cnt_mem_put_userptr || vb->cnt_mem_attach_dmabuf != vb->cnt_mem_detach_dmabuf || vb->cnt_mem_map_dmabuf != vb->cnt_mem_unmap_dmabuf || vb->cnt_buf_queue != vb->cnt_buf_done || vb->cnt_buf_prepare != vb->cnt_buf_finish || vb->cnt_buf_init != vb->cnt_buf_cleanup; if (unbalanced) { pr_info("unbalanced counters for queue %p, buffer %d:\n", q, i); if (vb->cnt_buf_init != vb->cnt_buf_cleanup) pr_info(" buf_init: %u buf_cleanup: %u\n", vb->cnt_buf_init, vb->cnt_buf_cleanup); if (vb->cnt_buf_prepare != vb->cnt_buf_finish) pr_info(" buf_prepare: %u buf_finish: %u\n", vb->cnt_buf_prepare, vb->cnt_buf_finish); if (vb->cnt_buf_queue != vb->cnt_buf_done) pr_info(" buf_out_validate: %u buf_queue: %u buf_done: %u buf_request_complete: %u\n", vb->cnt_buf_out_validate, vb->cnt_buf_queue, vb->cnt_buf_done, vb->cnt_buf_request_complete); if (vb->cnt_mem_alloc != vb->cnt_mem_put) pr_info(" alloc: %u put: %u\n", vb->cnt_mem_alloc, vb->cnt_mem_put); if (vb->cnt_mem_prepare != vb->cnt_mem_finish) pr_info(" prepare: %u finish: %u\n", vb->cnt_mem_prepare, vb->cnt_mem_finish); if (vb->cnt_mem_get_userptr != vb->cnt_mem_put_userptr) pr_info(" get_userptr: %u put_userptr: %u\n", vb->cnt_mem_get_userptr, vb->cnt_mem_put_userptr); if (vb->cnt_mem_attach_dmabuf != vb->cnt_mem_detach_dmabuf) pr_info(" attach_dmabuf: %u detach_dmabuf: %u\n", vb->cnt_mem_attach_dmabuf, vb->cnt_mem_detach_dmabuf); if (vb->cnt_mem_map_dmabuf != vb->cnt_mem_unmap_dmabuf) pr_info(" map_dmabuf: %u unmap_dmabuf: %u\n", vb->cnt_mem_map_dmabuf, vb->cnt_mem_unmap_dmabuf); pr_info(" get_dmabuf: %u num_users: %u\n", vb->cnt_mem_get_dmabuf, vb->cnt_mem_num_users); } } #endif /* Free vb2 buffers */ for (i = start; i < start + count; i++) { struct vb2_buffer *vb = vb2_get_buffer(q, i); if (!vb) continue; vb2_queue_remove_buffer(vb); kfree(vb); } if (!vb2_get_num_buffers(q)) { q->memory = VB2_MEMORY_UNKNOWN; INIT_LIST_HEAD(&q->queued_list); } } bool vb2_buffer_in_use(struct vb2_queue *q, struct vb2_buffer *vb) { unsigned int plane; for (plane = 0; plane < vb->num_planes; ++plane) { void *mem_priv = vb->planes[plane].mem_priv; /* * If num_users() has not been provided, call_memop * will return 0, apparently nobody cares about this * case anyway. If num_users() returns more than 1, * we are not the only user of the plane's memory. */ if (mem_priv && call_memop(vb, num_users, mem_priv) > 1) return true; } return false; } EXPORT_SYMBOL(vb2_buffer_in_use); /* * __buffers_in_use() - return true if any buffers on the queue are in use and * the queue cannot be freed (by the means of REQBUFS(0)) call */ static bool __buffers_in_use(struct vb2_queue *q) { unsigned int buffer; for (buffer = 0; buffer < q->max_num_buffers; ++buffer) { struct vb2_buffer *vb = vb2_get_buffer(q, buffer); if (!vb) continue; if (vb2_buffer_in_use(q, vb)) return true; } return false; } void vb2_core_querybuf(struct vb2_queue *q, struct vb2_buffer *vb, void *pb) { call_void_bufop(q, fill_user_buffer, vb, pb); } EXPORT_SYMBOL_GPL(vb2_core_querybuf); /* * __verify_userptr_ops() - verify that all memory operations required for * USERPTR queue type have been provided */ static int __verify_userptr_ops(struct vb2_queue *q) { if (!(q->io_modes & VB2_USERPTR) || !q->mem_ops->get_userptr || !q->mem_ops->put_userptr) return -EINVAL; return 0; } /* * __verify_mmap_ops() - verify that all memory operations required for * MMAP queue type have been provided */ static int __verify_mmap_ops(struct vb2_queue *q) { if (!(q->io_modes & VB2_MMAP) || !q->mem_ops->alloc || !q->mem_ops->put || !q->mem_ops->mmap) return -EINVAL; return 0; } /* * __verify_dmabuf_ops() - verify that all memory operations required for * DMABUF queue type have been provided */ static int __verify_dmabuf_ops(struct vb2_queue *q) { if (!(q->io_modes & VB2_DMABUF) || !q->mem_ops->attach_dmabuf || !q->mem_ops->detach_dmabuf || !q->mem_ops->map_dmabuf || !q->mem_ops->unmap_dmabuf) return -EINVAL; return 0; } int vb2_verify_memory_type(struct vb2_queue *q, enum vb2_memory memory, unsigned int type) { if (memory != VB2_MEMORY_MMAP && memory != VB2_MEMORY_USERPTR && memory != VB2_MEMORY_DMABUF) { dprintk(q, 1, "unsupported memory type\n"); return -EINVAL; } if (type != q->type) { dprintk(q, 1, "requested type is incorrect\n"); return -EINVAL; } /* * Make sure all the required memory ops for given memory type * are available. */ if (memory == VB2_MEMORY_MMAP && __verify_mmap_ops(q)) { dprintk(q, 1, "MMAP for current setup unsupported\n"); return -EINVAL; } if (memory == VB2_MEMORY_USERPTR && __verify_userptr_ops(q)) { dprintk(q, 1, "USERPTR for current setup unsupported\n"); return -EINVAL; } if (memory == VB2_MEMORY_DMABUF && __verify_dmabuf_ops(q)) { dprintk(q, 1, "DMABUF for current setup unsupported\n"); return -EINVAL; } /* * Place the busy tests at the end: -EBUSY can be ignored when * create_bufs is called with count == 0, but count == 0 should still * do the memory and type validation. */ if (vb2_fileio_is_active(q)) { dprintk(q, 1, "file io in progress\n"); return -EBUSY; } return 0; } EXPORT_SYMBOL(vb2_verify_memory_type); static void set_queue_coherency(struct vb2_queue *q, bool non_coherent_mem) { q->non_coherent_mem = 0; if (!vb2_queue_allows_cache_hints(q)) return; q->non_coherent_mem = non_coherent_mem; } static bool verify_coherency_flags(struct vb2_queue *q, bool non_coherent_mem) { if (non_coherent_mem != q->non_coherent_mem) { dprintk(q, 1, "memory coherency model mismatch\n"); return false; } return true; } static int vb2_core_allocated_buffers_storage(struct vb2_queue *q) { if (!q->bufs) q->bufs = kcalloc(q->max_num_buffers, sizeof(*q->bufs), GFP_KERNEL); if (!q->bufs) return -ENOMEM; if (!q->bufs_bitmap) q->bufs_bitmap = bitmap_zalloc(q->max_num_buffers, GFP_KERNEL); if (!q->bufs_bitmap) { kfree(q->bufs); q->bufs = NULL; return -ENOMEM; } return 0; } static void vb2_core_free_buffers_storage(struct vb2_queue *q) { kfree(q->bufs); q->bufs = NULL; bitmap_free(q->bufs_bitmap); q->bufs_bitmap = NULL; } int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, unsigned int flags, unsigned int *count) { unsigned int num_buffers, allocated_buffers, num_planes = 0; unsigned int q_num_bufs = vb2_get_num_buffers(q); unsigned plane_sizes[VB2_MAX_PLANES] = { }; bool non_coherent_mem = flags & V4L2_MEMORY_FLAG_NON_COHERENT; unsigned int i, first_index; int ret = 0; if (q->streaming) { dprintk(q, 1, "streaming active\n"); return -EBUSY; } if (q->waiting_in_dqbuf && *count) { dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n"); return -EBUSY; } if (*count == 0 || q_num_bufs != 0 || (q->memory != VB2_MEMORY_UNKNOWN && q->memory != memory) || !verify_coherency_flags(q, non_coherent_mem)) { /* * We already have buffers allocated, so first check if they * are not in use and can be freed. */ mutex_lock(&q->mmap_lock); if (debug && q->memory == VB2_MEMORY_MMAP && __buffers_in_use(q)) dprintk(q, 1, "memory in use, orphaning buffers\n"); /* * Call queue_cancel to clean up any buffers in the * QUEUED state which is possible if buffers were prepared or * queued without ever calling STREAMON. */ __vb2_queue_cancel(q); __vb2_queue_free(q, 0, q->max_num_buffers); mutex_unlock(&q->mmap_lock); q->is_busy = 0; /* * In case of REQBUFS(0) return immediately without calling * driver's queue_setup() callback and allocating resources. */ if (*count == 0) return 0; } /* * Make sure the requested values and current defaults are sane. */ num_buffers = max_t(unsigned int, *count, q->min_reqbufs_allocation); num_buffers = min_t(unsigned int, num_buffers, q->max_num_buffers); memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); /* * Set this now to ensure that drivers see the correct q->memory value * in the queue_setup op. */ mutex_lock(&q->mmap_lock); ret = vb2_core_allocated_buffers_storage(q); q->memory = memory; mutex_unlock(&q->mmap_lock); if (ret) return ret; set_queue_coherency(q, non_coherent_mem); /* * Ask the driver how many buffers and planes per buffer it requires. * Driver also sets the size and allocator context for each plane. */ ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes, plane_sizes, q->alloc_devs); if (ret) goto error; /* Check that driver has set sane values */ if (WARN_ON(!num_planes)) { ret = -EINVAL; goto error; } for (i = 0; i < num_planes; i++) if (WARN_ON(!plane_sizes[i])) { ret = -EINVAL; goto error; } /* Finally, allocate buffers and video memory */ allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes, &first_index); if (allocated_buffers == 0) { /* There shouldn't be any buffers allocated, so first_index == 0 */ WARN_ON(first_index); dprintk(q, 1, "memory allocation failed\n"); ret = -ENOMEM; goto error; } /* * There is no point in continuing if we can't allocate the minimum * number of buffers needed by this vb2_queue. */ if (allocated_buffers < q->min_reqbufs_allocation) ret = -ENOMEM; /* * Check if driver can handle the allocated number of buffers. */ if (!ret && allocated_buffers < num_buffers) { num_buffers = allocated_buffers; /* * num_planes is set by the previous queue_setup(), but since it * signals to queue_setup() whether it is called from create_bufs() * vs reqbufs() we zero it here to signal that queue_setup() is * called for the reqbufs() case. */ num_planes = 0; ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes, plane_sizes, q->alloc_devs); if (!ret && allocated_buffers < num_buffers) ret = -ENOMEM; /* * Either the driver has accepted a smaller number of buffers, * or .queue_setup() returned an error */ } mutex_lock(&q->mmap_lock); if (ret < 0) { /* * Note: __vb2_queue_free() will subtract 'allocated_buffers' * from already queued buffers and it will reset q->memory to * VB2_MEMORY_UNKNOWN. */ __vb2_queue_free(q, first_index, allocated_buffers); mutex_unlock(&q->mmap_lock); return ret; } mutex_unlock(&q->mmap_lock); /* * Return the number of successfully allocated buffers * to the userspace. */ *count = allocated_buffers; q->waiting_for_buffers = !q->is_output; q->is_busy = 1; return 0; error: mutex_lock(&q->mmap_lock); q->memory = VB2_MEMORY_UNKNOWN; mutex_unlock(&q->mmap_lock); vb2_core_free_buffers_storage(q); return ret; } EXPORT_SYMBOL_GPL(vb2_core_reqbufs); int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, unsigned int flags, unsigned int *count, unsigned int requested_planes, const unsigned int requested_sizes[], unsigned int *first_index) { unsigned int num_planes = 0, num_buffers, allocated_buffers; unsigned plane_sizes[VB2_MAX_PLANES] = { }; bool non_coherent_mem = flags & V4L2_MEMORY_FLAG_NON_COHERENT; unsigned int q_num_bufs = vb2_get_num_buffers(q); bool no_previous_buffers = !q_num_bufs; int ret = 0; if (q_num_bufs == q->max_num_buffers) { dprintk(q, 1, "maximum number of buffers already allocated\n"); return -ENOBUFS; } if (no_previous_buffers) { if (q->waiting_in_dqbuf && *count) { dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n"); return -EBUSY; } memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); /* * Set this now to ensure that drivers see the correct q->memory * value in the queue_setup op. */ mutex_lock(&q->mmap_lock); ret = vb2_core_allocated_buffers_storage(q); q->memory = memory; mutex_unlock(&q->mmap_lock); if (ret) return ret; q->waiting_for_buffers = !q->is_output; set_queue_coherency(q, non_coherent_mem); } else { if (q->memory != memory) { dprintk(q, 1, "memory model mismatch\n"); return -EINVAL; } if (!verify_coherency_flags(q, non_coherent_mem)) return -EINVAL; } num_buffers = min(*count, q->max_num_buffers - q_num_bufs); if (requested_planes && requested_sizes) { num_planes = requested_planes; memcpy(plane_sizes, requested_sizes, sizeof(plane_sizes)); } /* * Ask the driver, whether the requested number of buffers, planes per * buffer and their sizes are acceptable */ ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes, plane_sizes, q->alloc_devs); if (ret) goto error; /* Finally, allocate buffers and video memory */ allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes, first_index); if (allocated_buffers == 0) { dprintk(q, 1, "memory allocation failed\n"); ret = -ENOMEM; goto error; } /* * Check if driver can handle the so far allocated number of buffers. */ if (allocated_buffers < num_buffers) { num_buffers = allocated_buffers; /* * num_buffers contains the total number of buffers, that the * queue driver has set up */ ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes, plane_sizes, q->alloc_devs); if (!ret && allocated_buffers < num_buffers) ret = -ENOMEM; /* * Either the driver has accepted a smaller number of buffers, * or .queue_setup() returned an error */ } mutex_lock(&q->mmap_lock); if (ret < 0) { /* * Note: __vb2_queue_free() will subtract 'allocated_buffers' * from already queued buffers and it will reset q->memory to * VB2_MEMORY_UNKNOWN. */ __vb2_queue_free(q, *first_index, allocated_buffers); mutex_unlock(&q->mmap_lock); return -ENOMEM; } mutex_unlock(&q->mmap_lock); /* * Return the number of successfully allocated buffers * to the userspace. */ *count = allocated_buffers; q->is_busy = 1; return 0; error: if (no_previous_buffers) { mutex_lock(&q->mmap_lock); q->memory = VB2_MEMORY_UNKNOWN; mutex_unlock(&q->mmap_lock); } return ret; } EXPORT_SYMBOL_GPL(vb2_core_create_bufs); void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no) { if (plane_no >= vb->num_planes || !vb->planes[plane_no].mem_priv) return NULL; return call_ptr_memop(vaddr, vb, vb->planes[plane_no].mem_priv); } EXPORT_SYMBOL_GPL(vb2_plane_vaddr); void *vb2_plane_cookie(struct vb2_buffer *vb, unsigned int plane_no) { if (plane_no >= vb->num_planes || !vb->planes[plane_no].mem_priv) return NULL; return call_ptr_memop(cookie, vb, vb->planes[plane_no].mem_priv); } EXPORT_SYMBOL_GPL(vb2_plane_cookie); void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state) { struct vb2_queue *q = vb->vb2_queue; unsigned long flags; if (WARN_ON(vb->state != VB2_BUF_STATE_ACTIVE)) return; if (WARN_ON(state != VB2_BUF_STATE_DONE && state != VB2_BUF_STATE_ERROR && state != VB2_BUF_STATE_QUEUED)) state = VB2_BUF_STATE_ERROR; #ifdef CONFIG_VIDEO_ADV_DEBUG /* * Although this is not a callback, it still does have to balance * with the buf_queue op. So update this counter manually. */ vb->cnt_buf_done++; #endif dprintk(q, 4, "done processing on buffer %d, state: %s\n", vb->index, vb2_state_name(state)); if (state != VB2_BUF_STATE_QUEUED) __vb2_buf_mem_finish(vb); spin_lock_irqsave(&q->done_lock, flags); if (state == VB2_BUF_STATE_QUEUED) { vb->state = VB2_BUF_STATE_QUEUED; } else { /* Add the buffer to the done buffers list */ list_add_tail(&vb->done_entry, &q->done_list); vb->state = state; } atomic_dec(&q->owned_by_drv_count); if (state != VB2_BUF_STATE_QUEUED && vb->req_obj.req) { media_request_object_unbind(&vb->req_obj); media_request_object_put(&vb->req_obj); } spin_unlock_irqrestore(&q->done_lock, flags); trace_vb2_buf_done(q, vb); switch (state) { case VB2_BUF_STATE_QUEUED: return; default: /* Inform any processes that may be waiting for buffers */ wake_up(&q->done_wq); break; } } EXPORT_SYMBOL_GPL(vb2_buffer_done); void vb2_discard_done(struct vb2_queue *q) { struct vb2_buffer *vb; unsigned long flags; spin_lock_irqsave(&q->done_lock, flags); list_for_each_entry(vb, &q->done_list, done_entry) vb->state = VB2_BUF_STATE_ERROR; spin_unlock_irqrestore(&q->done_lock, flags); } EXPORT_SYMBOL_GPL(vb2_discard_done); /* * __prepare_mmap() - prepare an MMAP buffer */ static int __prepare_mmap(struct vb2_buffer *vb) { int ret = 0; ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, vb, vb->planes); return ret ? ret : call_vb_qop(vb, buf_prepare, vb); } /* * __prepare_userptr() - prepare a USERPTR buffer */ static int __prepare_userptr(struct vb2_buffer *vb) { struct vb2_plane planes[VB2_MAX_PLANES]; struct vb2_queue *q = vb->vb2_queue; void *mem_priv; unsigned int plane; int ret = 0; bool reacquired = vb->planes[0].mem_priv == NULL; memset(planes, 0, sizeof(planes[0]) * vb->num_planes); /* Copy relevant information provided by the userspace */ ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, vb, planes); if (ret) return ret; for (plane = 0; plane < vb->num_planes; ++plane) { /* Skip the plane if already verified */ if (vb->planes[plane].m.userptr && vb->planes[plane].m.userptr == planes[plane].m.userptr && vb->planes[plane].length == planes[plane].length) continue; dprintk(q, 3, "userspace address for plane %d changed, reacquiring memory\n", plane); /* Check if the provided plane buffer is large enough */ if (planes[plane].length < vb->planes[plane].min_length) { dprintk(q, 1, "provided buffer size %u is less than setup size %u for plane %d\n", planes[plane].length, vb->planes[plane].min_length, plane); ret = -EINVAL; goto err; } /* Release previously acquired memory if present */ if (vb->planes[plane].mem_priv) { if (!reacquired) { reacquired = true; vb->copied_timestamp = 0; call_void_vb_qop(vb, buf_cleanup, vb); } call_void_memop(vb, put_userptr, vb->planes[plane].mem_priv); } vb->planes[plane].mem_priv = NULL; vb->planes[plane].bytesused = 0; vb->planes[plane].length = 0; vb->planes[plane].m.userptr = 0; vb->planes[plane].data_offset = 0; /* Acquire each plane's memory */ mem_priv = call_ptr_memop(get_userptr, vb, q->alloc_devs[plane] ? : q->dev, planes[plane].m.userptr, planes[plane].length); if (IS_ERR(mem_priv)) { dprintk(q, 1, "failed acquiring userspace memory for plane %d\n", plane); ret = PTR_ERR(mem_priv); goto err; } vb->planes[plane].mem_priv = mem_priv; } /* * Now that everything is in order, copy relevant information * provided by userspace. */ for (plane = 0; plane < vb->num_planes; ++plane) { vb->planes[plane].bytesused = planes[plane].bytesused; vb->planes[plane].length = planes[plane].length; vb->planes[plane].m.userptr = planes[plane].m.userptr; vb->planes[plane].data_offset = planes[plane].data_offset; } if (reacquired) { /* * One or more planes changed, so we must call buf_init to do * the driver-specific initialization on the newly acquired * buffer, if provided. */ ret = call_vb_qop(vb, buf_init, vb); if (ret) { dprintk(q, 1, "buffer initialization failed\n"); goto err; } } ret = call_vb_qop(vb, buf_prepare, vb); if (ret) { dprintk(q, 1, "buffer preparation failed\n"); call_void_vb_qop(vb, buf_cleanup, vb); goto err; } return 0; err: /* In case of errors, release planes that were already acquired */ for (plane = 0; plane < vb->num_planes; ++plane) { if (vb->planes[plane].mem_priv) call_void_memop(vb, put_userptr, vb->planes[plane].mem_priv); vb->planes[plane].mem_priv = NULL; vb->planes[plane].m.userptr = 0; vb->planes[plane].length = 0; } return ret; } /* * __prepare_dmabuf() - prepare a DMABUF buffer */ static int __prepare_dmabuf(struct vb2_buffer *vb) { struct vb2_plane planes[VB2_MAX_PLANES]; struct vb2_queue *q = vb->vb2_queue; void *mem_priv; unsigned int plane; int ret = 0; bool reacquired = vb->planes[0].mem_priv == NULL; memset(planes, 0, sizeof(planes[0]) * vb->num_planes); /* Copy relevant information provided by the userspace */ ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, vb, planes); if (ret) return ret; for (plane = 0; plane < vb->num_planes; ++plane) { struct dma_buf *dbuf = dma_buf_get(planes[plane].m.fd); if (IS_ERR_OR_NULL(dbuf)) { dprintk(q, 1, "invalid dmabuf fd for plane %d\n", plane); ret = -EINVAL; goto err; } /* use DMABUF size if length is not provided */ if (planes[plane].length == 0) planes[plane].length = dbuf->size; if (planes[plane].length < vb->planes[plane].min_length) { dprintk(q, 1, "invalid dmabuf length %u for plane %d, minimum length %u\n", planes[plane].length, plane, vb->planes[plane].min_length); dma_buf_put(dbuf); ret = -EINVAL; goto err; } /* Skip the plane if already verified */ if (dbuf == vb->planes[plane].dbuf && vb->planes[plane].length == planes[plane].length) { dma_buf_put(dbuf); continue; } dprintk(q, 3, "buffer for plane %d changed\n", plane); if (!reacquired) { reacquired = true; vb->copied_timestamp = 0; call_void_vb_qop(vb, buf_cleanup, vb); } /* Release previously acquired memory if present */ __vb2_plane_dmabuf_put(vb, &vb->planes[plane]); vb->planes[plane].bytesused = 0; vb->planes[plane].length = 0; vb->planes[plane].m.fd = 0; vb->planes[plane].data_offset = 0; /* Acquire each plane's memory */ mem_priv = call_ptr_memop(attach_dmabuf, vb, q->alloc_devs[plane] ? : q->dev, dbuf, planes[plane].length); if (IS_ERR(mem_priv)) { dprintk(q, 1, "failed to attach dmabuf\n"); ret = PTR_ERR(mem_priv); dma_buf_put(dbuf); goto err; } vb->planes[plane].dbuf = dbuf; vb->planes[plane].mem_priv = mem_priv; } /* * This pins the buffer(s) with dma_buf_map_attachment()). It's done * here instead just before the DMA, while queueing the buffer(s) so * userspace knows sooner rather than later if the dma-buf map fails. */ for (plane = 0; plane < vb->num_planes; ++plane) { if (vb->planes[plane].dbuf_mapped) continue; ret = call_memop(vb, map_dmabuf, vb->planes[plane].mem_priv); if (ret) { dprintk(q, 1, "failed to map dmabuf for plane %d\n", plane); goto err; } vb->planes[plane].dbuf_mapped = 1; } /* * Now that everything is in order, copy relevant information * provided by userspace. */ for (plane = 0; plane < vb->num_planes; ++plane) { vb->planes[plane].bytesused = planes[plane].bytesused; vb->planes[plane].length = planes[plane].length; vb->planes[plane].m.fd = planes[plane].m.fd; vb->planes[plane].data_offset = planes[plane].data_offset; } if (reacquired) { /* * Call driver-specific initialization on the newly acquired buffer, * if provided. */ ret = call_vb_qop(vb, buf_init, vb); if (ret) { dprintk(q, 1, "buffer initialization failed\n"); goto err; } } ret = call_vb_qop(vb, buf_prepare, vb); if (ret) { dprintk(q, 1, "buffer preparation failed\n"); call_void_vb_qop(vb, buf_cleanup, vb); goto err; } return 0; err: /* In case of errors, release planes that were already acquired */ __vb2_buf_dmabuf_put(vb); return ret; } /* * __enqueue_in_driver() - enqueue a vb2_buffer in driver for processing */ static void __enqueue_in_driver(struct vb2_buffer *vb) { struct vb2_queue *q = vb->vb2_queue; vb->state = VB2_BUF_STATE_ACTIVE; atomic_inc(&q->owned_by_drv_count); trace_vb2_buf_queue(q, vb); call_void_vb_qop(vb, buf_queue, vb); } static int __buf_prepare(struct vb2_buffer *vb) { struct vb2_queue *q = vb->vb2_queue; enum vb2_buffer_state orig_state = vb->state; int ret; if (q->error) { dprintk(q, 1, "fatal error occurred on queue\n"); return -EIO; } if (vb->prepared) return 0; WARN_ON(vb->synced); if (q->is_output) { ret = call_vb_qop(vb, buf_out_validate, vb); if (ret) { dprintk(q, 1, "buffer validation failed\n"); return ret; } } vb->state = VB2_BUF_STATE_PREPARING; switch (q->memory) { case VB2_MEMORY_MMAP: ret = __prepare_mmap(vb); break; case VB2_MEMORY_USERPTR: ret = __prepare_userptr(vb); break; case VB2_MEMORY_DMABUF: ret = __prepare_dmabuf(vb); break; default: WARN(1, "Invalid queue type\n"); ret = -EINVAL; break; } if (ret) { dprintk(q, 1, "buffer preparation failed: %d\n", ret); vb->state = orig_state; return ret; } __vb2_buf_mem_prepare(vb); vb->prepared = 1; vb->state = orig_state; return 0; } static int vb2_req_prepare(struct media_request_object *obj) { struct vb2_buffer *vb = container_of(obj, struct vb2_buffer, req_obj); int ret; if (WARN_ON(vb->state != VB2_BUF_STATE_IN_REQUEST)) return -EINVAL; mutex_lock(vb->vb2_queue->lock); ret = __buf_prepare(vb); mutex_unlock(vb->vb2_queue->lock); return ret; } static void __vb2_dqbuf(struct vb2_buffer *vb); static void vb2_req_unprepare(struct media_request_object *obj) { struct vb2_buffer *vb = container_of(obj, struct vb2_buffer, req_obj); mutex_lock(vb->vb2_queue->lock); __vb2_dqbuf(vb); vb->state = VB2_BUF_STATE_IN_REQUEST; mutex_unlock(vb->vb2_queue->lock); WARN_ON(!vb->req_obj.req); } static void vb2_req_queue(struct media_request_object *obj) { struct vb2_buffer *vb = container_of(obj, struct vb2_buffer, req_obj); int err; mutex_lock(vb->vb2_queue->lock); /* * There is no method to propagate an error from vb2_core_qbuf(), * so if this returns a non-0 value, then WARN. * * The only exception is -EIO which is returned if q->error is * set. We just ignore that, and expect this will be caught the * next time vb2_req_prepare() is called. */ err = vb2_core_qbuf(vb->vb2_queue, vb, NULL, NULL); WARN_ON_ONCE(err && err != -EIO); mutex_unlock(vb->vb2_queue->lock); } static void vb2_req_unbind(struct media_request_object *obj) { struct vb2_buffer *vb = container_of(obj, struct vb2_buffer, req_obj); if (vb->state == VB2_BUF_STATE_IN_REQUEST) call_void_bufop(vb->vb2_queue, init_buffer, vb); } static void vb2_req_release(struct media_request_object *obj) { struct vb2_buffer *vb = container_of(obj, struct vb2_buffer, req_obj); if (vb->state == VB2_BUF_STATE_IN_REQUEST) { vb->state = VB2_BUF_STATE_DEQUEUED; if (vb->request) media_request_put(vb->request); vb->request = NULL; } } static const struct media_request_object_ops vb2_core_req_ops = { .prepare = vb2_req_prepare, .unprepare = vb2_req_unprepare, .queue = vb2_req_queue, .unbind = vb2_req_unbind, .release = vb2_req_release, }; bool vb2_request_object_is_buffer(struct media_request_object *obj) { return obj->ops == &vb2_core_req_ops; } EXPORT_SYMBOL_GPL(vb2_request_object_is_buffer); unsigned int vb2_request_buffer_cnt(struct media_request *req) { struct media_request_object *obj; unsigned long flags; unsigned int buffer_cnt = 0; spin_lock_irqsave(&req->lock, flags); list_for_each_entry(obj, &req->objects, list) if (vb2_request_object_is_buffer(obj)) buffer_cnt++; spin_unlock_irqrestore(&req->lock, flags); return buffer_cnt; } EXPORT_SYMBOL_GPL(vb2_request_buffer_cnt); int vb2_core_prepare_buf(struct vb2_queue *q, struct vb2_buffer *vb, void *pb) { int ret; if (vb->state != VB2_BUF_STATE_DEQUEUED) { dprintk(q, 1, "invalid buffer state %s\n", vb2_state_name(vb->state)); return -EINVAL; } if (vb->prepared) { dprintk(q, 1, "buffer already prepared\n"); return -EINVAL; } ret = __buf_prepare(vb); if (ret) return ret; /* Fill buffer information for the userspace */ call_void_bufop(q, fill_user_buffer, vb, pb); dprintk(q, 2, "prepare of buffer %d succeeded\n", vb->index); return 0; } EXPORT_SYMBOL_GPL(vb2_core_prepare_buf); int vb2_core_remove_bufs(struct vb2_queue *q, unsigned int start, unsigned int count) { unsigned int i, ret = 0; unsigned int q_num_bufs = vb2_get_num_buffers(q); if (count == 0) return 0; if (count > q_num_bufs) return -EINVAL; if (start > q->max_num_buffers - count) return -EINVAL; mutex_lock(&q->mmap_lock); /* Check that all buffers in the range exist */ for (i = start; i < start + count; i++) { struct vb2_buffer *vb = vb2_get_buffer(q, i); if (!vb) { ret = -EINVAL; goto unlock; } if (vb->state != VB2_BUF_STATE_DEQUEUED) { ret = -EBUSY; goto unlock; } } __vb2_queue_free(q, start, count); dprintk(q, 2, "%u buffers removed\n", count); unlock: mutex_unlock(&q->mmap_lock); return ret; } EXPORT_SYMBOL_GPL(vb2_core_remove_bufs); /* * vb2_start_streaming() - Attempt to start streaming. * @q: videobuf2 queue * * Attempt to start streaming. When this function is called there must be * at least q->min_queued_buffers queued up (i.e. the minimum * number of buffers required for the DMA engine to function). If the * @start_streaming op fails it is supposed to return all the driver-owned * buffers back to vb2 in state QUEUED. Check if that happened and if * not warn and reclaim them forcefully. */ static int vb2_start_streaming(struct vb2_queue *q) { struct vb2_buffer *vb; int ret; /* * If any buffers were queued before streamon, * we can now pass them to driver for processing. */ list_for_each_entry(vb, &q->queued_list, queued_entry) __enqueue_in_driver(vb); /* Tell the driver to start streaming */ q->start_streaming_called = 1; ret = call_qop(q, start_streaming, q, atomic_read(&q->owned_by_drv_count)); if (!ret) return 0; q->start_streaming_called = 0; dprintk(q, 1, "driver refused to start streaming\n"); /* * If you see this warning, then the driver isn't cleaning up properly * after a failed start_streaming(). See the start_streaming() * documentation in videobuf2-core.h for more information how buffers * should be returned to vb2 in start_streaming(). */ if (WARN_ON(atomic_read(&q->owned_by_drv_count))) { unsigned i; /* * Forcefully reclaim buffers if the driver did not * correctly return them to vb2. */ for (i = 0; i < q->max_num_buffers; ++i) { vb = vb2_get_buffer(q, i); if (!vb) continue; if (vb->state == VB2_BUF_STATE_ACTIVE) vb2_buffer_done(vb, VB2_BUF_STATE_QUEUED); } /* Must be zero now */ WARN_ON(atomic_read(&q->owned_by_drv_count)); } /* * If done_list is not empty, then start_streaming() didn't call * vb2_buffer_done(vb, VB2_BUF_STATE_QUEUED) but STATE_ERROR or * STATE_DONE. */ WARN_ON(!list_empty(&q->done_list)); return ret; } int vb2_core_qbuf(struct vb2_queue *q, struct vb2_buffer *vb, void *pb, struct media_request *req) { enum vb2_buffer_state orig_state; int ret; if (q->error) { dprintk(q, 1, "fatal error occurred on queue\n"); return -EIO; } if (!req && vb->state != VB2_BUF_STATE_IN_REQUEST && q->requires_requests) { dprintk(q, 1, "qbuf requires a request\n"); return -EBADR; } if ((req && q->uses_qbuf) || (!req && vb->state != VB2_BUF_STATE_IN_REQUEST && q->uses_requests)) { dprintk(q, 1, "queue in wrong mode (qbuf vs requests)\n"); return -EBUSY; } if (req) { int ret; q->uses_requests = 1; if (vb->state != VB2_BUF_STATE_DEQUEUED) { dprintk(q, 1, "buffer %d not in dequeued state\n", vb->index); return -EINVAL; } if (q->is_output && !vb->prepared) { ret = call_vb_qop(vb, buf_out_validate, vb); if (ret) { dprintk(q, 1, "buffer validation failed\n"); return ret; } } media_request_object_init(&vb->req_obj); /* Make sure the request is in a safe state for updating. */ ret = media_request_lock_for_update(req); if (ret) return ret; ret = media_request_object_bind(req, &vb2_core_req_ops, q, true, &vb->req_obj); media_request_unlock_for_update(req); if (ret) return ret; vb->state = VB2_BUF_STATE_IN_REQUEST; /* * Increment the refcount and store the request. * The request refcount is decremented again when the * buffer is dequeued. This is to prevent vb2_buffer_done() * from freeing the request from interrupt context, which can * happen if the application closed the request fd after * queueing the request. */ media_request_get(req); vb->request = req; /* Fill buffer information for the userspace */ if (pb) { call_void_bufop(q, copy_timestamp, vb, pb); call_void_bufop(q, fill_user_buffer, vb, pb); } dprintk(q, 2, "qbuf of buffer %d succeeded\n", vb->index); return 0; } if (vb->state != VB2_BUF_STATE_IN_REQUEST) q->uses_qbuf = 1; switch (vb->state) { case VB2_BUF_STATE_DEQUEUED: case VB2_BUF_STATE_IN_REQUEST: if (!vb->prepared) { ret = __buf_prepare(vb); if (ret) return ret; } break; case VB2_BUF_STATE_PREPARING: dprintk(q, 1, "buffer still being prepared\n"); return -EINVAL; default: dprintk(q, 1, "invalid buffer state %s\n", vb2_state_name(vb->state)); return -EINVAL; } /* * Add to the queued buffers list, a buffer will stay on it until * dequeued in dqbuf. */ orig_state = vb->state; list_add_tail(&vb->queued_entry, &q->queued_list); q->queued_count++; q->waiting_for_buffers = false; vb->state = VB2_BUF_STATE_QUEUED; if (pb) call_void_bufop(q, copy_timestamp, vb, pb); trace_vb2_qbuf(q, vb); /* * If already streaming, give the buffer to driver for processing. * If not, the buffer will be given to driver on next streamon. */ if (q->start_streaming_called) __enqueue_in_driver(vb); /* Fill buffer information for the userspace */ if (pb) call_void_bufop(q, fill_user_buffer, vb, pb); /* * If streamon has been called, and we haven't yet called * start_streaming() since not enough buffers were queued, and * we now have reached the minimum number of queued buffers, * then we can finally call start_streaming(). */ if (q->streaming && !q->start_streaming_called && q->queued_count >= q->min_queued_buffers) { ret = vb2_start_streaming(q); if (ret) { /* * Since vb2_core_qbuf will return with an error, * we should return it to state DEQUEUED since * the error indicates that the buffer wasn't queued. */ list_del(&vb->queued_entry); q->queued_count--; vb->state = orig_state; return ret; } } dprintk(q, 2, "qbuf of buffer %d succeeded\n", vb->index); return 0; } EXPORT_SYMBOL_GPL(vb2_core_qbuf); /* * __vb2_wait_for_done_vb() - wait for a buffer to become available * for dequeuing * * Will sleep if required for nonblocking == false. */ static int __vb2_wait_for_done_vb(struct vb2_queue *q, int nonblocking) { /* * All operations on vb_done_list are performed under done_lock * spinlock protection. However, buffers may be removed from * it and returned to userspace only while holding both driver's * lock and the done_lock spinlock. Thus we can be sure that as * long as we hold the driver's lock, the list will remain not * empty if list_empty() check succeeds. */ for (;;) { int ret; if (q->waiting_in_dqbuf) { dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n"); return -EBUSY; } if (!q->streaming) { dprintk(q, 1, "streaming off, will not wait for buffers\n"); return -EINVAL; } if (q->error) { dprintk(q, 1, "Queue in error state, will not wait for buffers\n"); return -EIO; } if (q->last_buffer_dequeued) { dprintk(q, 3, "last buffer dequeued already, will not wait for buffers\n"); return -EPIPE; } if (!list_empty(&q->done_list)) { /* * Found a buffer that we were waiting for. */ break; } if (nonblocking) { dprintk(q, 3, "nonblocking and no buffers to dequeue, will not wait\n"); return -EAGAIN; } q->waiting_in_dqbuf = 1; /* * We are streaming and blocking, wait for another buffer to * become ready or for streamoff. Driver's lock is released to * allow streamoff or qbuf to be called while waiting. */ call_void_qop(q, wait_prepare, q); /* * All locks have been released, it is safe to sleep now. */ dprintk(q, 3, "will sleep waiting for buffers\n"); ret = wait_event_interruptible(q->done_wq, !list_empty(&q->done_list) || !q->streaming || q->error); /* * We need to reevaluate both conditions again after reacquiring * the locks or return an error if one occurred. */ call_void_qop(q, wait_finish, q); q->waiting_in_dqbuf = 0; if (ret) { dprintk(q, 1, "sleep was interrupted\n"); return ret; } } return 0; } /* * __vb2_get_done_vb() - get a buffer ready for dequeuing * * Will sleep if required for nonblocking == false. */ static int __vb2_get_done_vb(struct vb2_queue *q, struct vb2_buffer **vb, void *pb, int nonblocking) { unsigned long flags; int ret = 0; /* * Wait for at least one buffer to become available on the done_list. */ ret = __vb2_wait_for_done_vb(q, nonblocking); if (ret) return ret; /* * Driver's lock has been held since we last verified that done_list * is not empty, so no need for another list_empty(done_list) check. */ spin_lock_irqsave(&q->done_lock, flags); *vb = list_first_entry(&q->done_list, struct vb2_buffer, done_entry); /* * Only remove the buffer from done_list if all planes can be * handled. Some cases such as V4L2 file I/O and DVB have pb * == NULL; skip the check then as there's nothing to verify. */ if (pb) ret = call_bufop(q, verify_planes_array, *vb, pb); if (!ret) list_del(&(*vb)->done_entry); spin_unlock_irqrestore(&q->done_lock, flags); return ret; } int vb2_wait_for_all_buffers(struct vb2_queue *q) { if (!q->streaming) { dprintk(q, 1, "streaming off, will not wait for buffers\n"); return -EINVAL; } if (q->start_streaming_called) wait_event(q->done_wq, !atomic_read(&q->owned_by_drv_count)); return 0; } EXPORT_SYMBOL_GPL(vb2_wait_for_all_buffers); /* * __vb2_dqbuf() - bring back the buffer to the DEQUEUED state */ static void __vb2_dqbuf(struct vb2_buffer *vb) { struct vb2_queue *q = vb->vb2_queue; /* nothing to do if the buffer is already dequeued */ if (vb->state == VB2_BUF_STATE_DEQUEUED) return; vb->state = VB2_BUF_STATE_DEQUEUED; call_void_bufop(q, init_buffer, vb); } int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb, bool nonblocking) { struct vb2_buffer *vb = NULL; int ret; ret = __vb2_get_done_vb(q, &vb, pb, nonblocking); if (ret < 0) return ret; switch (vb->state) { case VB2_BUF_STATE_DONE: dprintk(q, 3, "returning done buffer\n"); break; case VB2_BUF_STATE_ERROR: dprintk(q, 3, "returning done buffer with errors\n"); break; default: dprintk(q, 1, "invalid buffer state %s\n", vb2_state_name(vb->state)); return -EINVAL; } call_void_vb_qop(vb, buf_finish, vb); vb->prepared = 0; if (pindex) *pindex = vb->index; /* Fill buffer information for the userspace */ if (pb) call_void_bufop(q, fill_user_buffer, vb, pb); /* Remove from vb2 queue */ list_del(&vb->queued_entry); q->queued_count--; trace_vb2_dqbuf(q, vb); /* go back to dequeued state */ __vb2_dqbuf(vb); if (WARN_ON(vb->req_obj.req)) { media_request_object_unbind(&vb->req_obj); media_request_object_put(&vb->req_obj); } if (vb->request) media_request_put(vb->request); vb->request = NULL; dprintk(q, 2, "dqbuf of buffer %d, state: %s\n", vb->index, vb2_state_name(vb->state)); return 0; } EXPORT_SYMBOL_GPL(vb2_core_dqbuf); /* * __vb2_queue_cancel() - cancel and stop (pause) streaming * * Removes all queued buffers from driver's queue and all buffers queued by * userspace from vb2's queue. Returns to state after reqbufs. */ static void __vb2_queue_cancel(struct vb2_queue *q) { unsigned int i; /* * Tell driver to stop all transactions and release all queued * buffers. */ if (q->start_streaming_called) call_void_qop(q, stop_streaming, q); if (q->streaming) call_void_qop(q, unprepare_streaming, q); /* * If you see this warning, then the driver isn't cleaning up properly * in stop_streaming(). See the stop_streaming() documentation in * videobuf2-core.h for more information how buffers should be returned * to vb2 in stop_streaming(). */ if (WARN_ON(atomic_read(&q->owned_by_drv_count))) { for (i = 0; i < q->max_num_buffers; i++) { struct vb2_buffer *vb = vb2_get_buffer(q, i); if (!vb) continue; if (vb->state == VB2_BUF_STATE_ACTIVE) { pr_warn("driver bug: stop_streaming operation is leaving buffer %u in active state\n", vb->index); vb2_buffer_done(vb, VB2_BUF_STATE_ERROR); } } /* Must be zero now */ WARN_ON(atomic_read(&q->owned_by_drv_count)); } q->streaming = 0; q->start_streaming_called = 0; q->queued_count = 0; q->error = 0; q->uses_requests = 0; q->uses_qbuf = 0; /* * Remove all buffers from vb2's list... */ INIT_LIST_HEAD(&q->queued_list); /* * ...and done list; userspace will not receive any buffers it * has not already dequeued before initiating cancel. */ INIT_LIST_HEAD(&q->done_list); atomic_set(&q->owned_by_drv_count, 0); wake_up_all(&q->done_wq); /* * Reinitialize all buffers for next use. * Make sure to call buf_finish for any queued buffers. Normally * that's done in dqbuf, but that's not going to happen when we * cancel the whole queue. Note: this code belongs here, not in * __vb2_dqbuf() since in vb2_core_dqbuf() there is a critical * call to __fill_user_buffer() after buf_finish(). That order can't * be changed, so we can't move the buf_finish() to __vb2_dqbuf(). */ for (i = 0; i < q->max_num_buffers; i++) { struct vb2_buffer *vb; struct media_request *req; vb = vb2_get_buffer(q, i); if (!vb) continue; req = vb->req_obj.req; /* * If a request is associated with this buffer, then * call buf_request_cancel() to give the driver to complete() * related request objects. Otherwise those objects would * never complete. */ if (req) { enum media_request_state state; unsigned long flags; spin_lock_irqsave(&req->lock, flags); state = req->state; spin_unlock_irqrestore(&req->lock, flags); if (state == MEDIA_REQUEST_STATE_QUEUED) call_void_vb_qop(vb, buf_request_complete, vb); } __vb2_buf_mem_finish(vb); if (vb->prepared) { call_void_vb_qop(vb, buf_finish, vb); vb->prepared = 0; } __vb2_dqbuf(vb); if (vb->req_obj.req) { media_request_object_unbind(&vb->req_obj); media_request_object_put(&vb->req_obj); } if (vb->request) media_request_put(vb->request); vb->request = NULL; vb->copied_timestamp = 0; } } int vb2_core_streamon(struct vb2_queue *q, unsigned int type) { unsigned int q_num_bufs = vb2_get_num_buffers(q); int ret; if (type != q->type) { dprintk(q, 1, "invalid stream type\n"); return -EINVAL; } if (q->streaming) { dprintk(q, 3, "already streaming\n"); return 0; } if (!q_num_bufs) { dprintk(q, 1, "no buffers have been allocated\n"); return -EINVAL; } if (q_num_bufs < q->min_queued_buffers) { dprintk(q, 1, "need at least %u queued buffers\n", q->min_queued_buffers); return -EINVAL; } ret = call_qop(q, prepare_streaming, q); if (ret) return ret; /* * Tell driver to start streaming provided sufficient buffers * are available. */ if (q->queued_count >= q->min_queued_buffers) { ret = vb2_start_streaming(q); if (ret) goto unprepare; } q->streaming = 1; dprintk(q, 3, "successful\n"); return 0; unprepare: call_void_qop(q, unprepare_streaming, q); return ret; } EXPORT_SYMBOL_GPL(vb2_core_streamon); void vb2_queue_error(struct vb2_queue *q) { q->error = 1; wake_up_all(&q->done_wq); } EXPORT_SYMBOL_GPL(vb2_queue_error); int vb2_core_streamoff(struct vb2_queue *q, unsigned int type) { if (type != q->type) { dprintk(q, 1, "invalid stream type\n"); return -EINVAL; } /* * Cancel will pause streaming and remove all buffers from the driver * and vb2, effectively returning control over them to userspace. * * Note that we do this even if q->streaming == 0: if you prepare or * queue buffers, and then call streamoff without ever having called * streamon, you would still expect those buffers to be returned to * their normal dequeued state. */ __vb2_queue_cancel(q); q->waiting_for_buffers = !q->is_output; q->last_buffer_dequeued = false; dprintk(q, 3, "successful\n"); return 0; } EXPORT_SYMBOL_GPL(vb2_core_streamoff); /* * __find_plane_by_offset() - find plane associated with the given offset */ static int __find_plane_by_offset(struct vb2_queue *q, unsigned long offset, struct vb2_buffer **vb, unsigned int *plane) { unsigned int buffer; /* * Sanity checks to ensure the lock is held, MEMORY_MMAP is * used and fileio isn't active. */ lockdep_assert_held(&q->mmap_lock); if (q->memory != VB2_MEMORY_MMAP) { dprintk(q, 1, "queue is not currently set up for mmap\n"); return -EINVAL; } if (vb2_fileio_is_active(q)) { dprintk(q, 1, "file io in progress\n"); return -EBUSY; } /* Get buffer and plane from the offset */ buffer = (offset >> PLANE_INDEX_SHIFT) & BUFFER_INDEX_MASK; *plane = (offset >> PAGE_SHIFT) & PLANE_INDEX_MASK; *vb = vb2_get_buffer(q, buffer); if (!*vb) return -EINVAL; if (*plane >= (*vb)->num_planes) return -EINVAL; return 0; } int vb2_core_expbuf(struct vb2_queue *q, int *fd, unsigned int type, struct vb2_buffer *vb, unsigned int plane, unsigned int flags) { struct vb2_plane *vb_plane; int ret; struct dma_buf *dbuf; if (q->memory != VB2_MEMORY_MMAP) { dprintk(q, 1, "queue is not currently set up for mmap\n"); return -EINVAL; } if (!q->mem_ops->get_dmabuf) { dprintk(q, 1, "queue does not support DMA buffer exporting\n"); return -EINVAL; } if (flags & ~(O_CLOEXEC | O_ACCMODE)) { dprintk(q, 1, "queue does support only O_CLOEXEC and access mode flags\n"); return -EINVAL; } if (type != q->type) { dprintk(q, 1, "invalid buffer type\n"); return -EINVAL; } if (plane >= vb->num_planes) { dprintk(q, 1, "buffer plane out of range\n"); return -EINVAL; } if (vb2_fileio_is_active(q)) { dprintk(q, 1, "expbuf: file io in progress\n"); return -EBUSY; } vb_plane = &vb->planes[plane]; dbuf = call_ptr_memop(get_dmabuf, vb, vb_plane->mem_priv, flags & O_ACCMODE); if (IS_ERR_OR_NULL(dbuf)) { dprintk(q, 1, "failed to export buffer %d, plane %d\n", vb->index, plane); return -EINVAL; } ret = dma_buf_fd(dbuf, flags & ~O_ACCMODE); if (ret < 0) { dprintk(q, 3, "buffer %d, plane %d failed to export (%d)\n", vb->index, plane, ret); dma_buf_put(dbuf); return ret; } dprintk(q, 3, "buffer %d, plane %d exported as %d descriptor\n", vb->index, plane, ret); *fd = ret; return 0; } EXPORT_SYMBOL_GPL(vb2_core_expbuf); int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) { unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; struct vb2_buffer *vb; unsigned int plane = 0; int ret; unsigned long length; /* * Check memory area access mode. */ if (!(vma->vm_flags & VM_SHARED)) { dprintk(q, 1, "invalid vma flags, VM_SHARED needed\n"); return -EINVAL; } if (q->is_output) { if (!(vma->vm_flags & VM_WRITE)) { dprintk(q, 1, "invalid vma flags, VM_WRITE needed\n"); return -EINVAL; } } else { if (!(vma->vm_flags & VM_READ)) { dprintk(q, 1, "invalid vma flags, VM_READ needed\n"); return -EINVAL; } } mutex_lock(&q->mmap_lock); /* * Find the plane corresponding to the offset passed by userspace. This * will return an error if not MEMORY_MMAP or file I/O is in progress. */ ret = __find_plane_by_offset(q, offset, &vb, &plane); if (ret) goto unlock; /* * MMAP requires page_aligned buffers. * The buffer length was page_aligned at __vb2_buf_mem_alloc(), * so, we need to do the same here. */ length = PAGE_ALIGN(vb->planes[plane].length); if (length < (vma->vm_end - vma->vm_start)) { dprintk(q, 1, "MMAP invalid, as it would overflow buffer length\n"); ret = -EINVAL; goto unlock; } /* * vm_pgoff is treated in V4L2 API as a 'cookie' to select a buffer, * not as a in-buffer offset. We always want to mmap a whole buffer * from its beginning. */ vma->vm_pgoff = 0; ret = call_memop(vb, mmap, vb->planes[plane].mem_priv, vma); unlock: mutex_unlock(&q->mmap_lock); if (ret) return ret; dprintk(q, 3, "buffer %u, plane %d successfully mapped\n", vb->index, plane); return 0; } EXPORT_SYMBOL_GPL(vb2_mmap); #ifndef CONFIG_MMU unsigned long vb2_get_unmapped_area(struct vb2_queue *q, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { unsigned long offset = pgoff << PAGE_SHIFT; struct vb2_buffer *vb; unsigned int plane; void *vaddr; int ret; mutex_lock(&q->mmap_lock); /* * Find the plane corresponding to the offset passed by userspace. This * will return an error if not MEMORY_MMAP or file I/O is in progress. */ ret = __find_plane_by_offset(q, offset, &vb, &plane); if (ret) goto unlock; vaddr = vb2_plane_vaddr(vb, plane); mutex_unlock(&q->mmap_lock); return vaddr ? (unsigned long)vaddr : -EINVAL; unlock: mutex_unlock(&q->mmap_lock); return ret; } EXPORT_SYMBOL_GPL(vb2_get_unmapped_area); #endif int vb2_core_queue_init(struct vb2_queue *q) { /* * Sanity check */ /* * For drivers who don't support max_num_buffers ensure * a backward compatibility. */ if (!q->max_num_buffers) q->max_num_buffers = VB2_MAX_FRAME; /* The maximum is limited by offset cookie encoding pattern */ q->max_num_buffers = min_t(unsigned int, q->max_num_buffers, MAX_BUFFER_INDEX); if (WARN_ON(!q) || WARN_ON(!q->ops) || WARN_ON(!q->mem_ops) || WARN_ON(!q->type) || WARN_ON(!q->io_modes) || WARN_ON(!q->ops->queue_setup) || WARN_ON(!q->ops->buf_queue)) return -EINVAL; if (WARN_ON(q->max_num_buffers < VB2_MAX_FRAME) || WARN_ON(q->min_queued_buffers > q->max_num_buffers)) return -EINVAL; if (WARN_ON(q->requires_requests && !q->supports_requests)) return -EINVAL; /* * This combination is not allowed since a non-zero value of * q->min_queued_buffers can cause vb2_core_qbuf() to fail if * it has to call start_streaming(), and the Request API expects * that queueing a request (and thus queueing a buffer contained * in that request) will always succeed. There is no method of * propagating an error back to userspace. */ if (WARN_ON(q->supports_requests && q->min_queued_buffers)) return -EINVAL; /* * The minimum requirement is 2: one buffer is used * by the hardware while the other is being processed by userspace. */ if (q->min_reqbufs_allocation < 2) q->min_reqbufs_allocation = 2; /* * If the driver needs 'min_queued_buffers' in the queue before * calling start_streaming() then the minimum requirement is * 'min_queued_buffers + 1' to keep at least one buffer available * for userspace. */ if (q->min_reqbufs_allocation < q->min_queued_buffers + 1) q->min_reqbufs_allocation = q->min_queued_buffers + 1; if (WARN_ON(q->min_reqbufs_allocation > q->max_num_buffers)) return -EINVAL; INIT_LIST_HEAD(&q->queued_list); INIT_LIST_HEAD(&q->done_list); spin_lock_init(&q->done_lock); mutex_init(&q->mmap_lock); init_waitqueue_head(&q->done_wq); q->memory = VB2_MEMORY_UNKNOWN; if (q->buf_struct_size == 0) q->buf_struct_size = sizeof(struct vb2_buffer); if (q->bidirectional) q->dma_dir = DMA_BIDIRECTIONAL; else q->dma_dir = q->is_output ? DMA_TO_DEVICE : DMA_FROM_DEVICE; if (q->name[0] == '\0') snprintf(q->name, sizeof(q->name), "%s-%p", q->is_output ? "out" : "cap", q); return 0; } EXPORT_SYMBOL_GPL(vb2_core_queue_init); static int __vb2_init_fileio(struct vb2_queue *q, int read); static int __vb2_cleanup_fileio(struct vb2_queue *q); void vb2_core_queue_release(struct vb2_queue *q) { __vb2_cleanup_fileio(q); __vb2_queue_cancel(q); mutex_lock(&q->mmap_lock); __vb2_queue_free(q, 0, q->max_num_buffers); vb2_core_free_buffers_storage(q); q->is_busy = 0; mutex_unlock(&q->mmap_lock); } EXPORT_SYMBOL_GPL(vb2_core_queue_release); __poll_t vb2_core_poll(struct vb2_queue *q, struct file *file, poll_table *wait) { __poll_t req_events = poll_requested_events(wait); struct vb2_buffer *vb = NULL; unsigned long flags; /* * poll_wait() MUST be called on the first invocation on all the * potential queues of interest, even if we are not interested in their * events during this first call. Failure to do so will result in * queue's events to be ignored because the poll_table won't be capable * of adding new wait queues thereafter. */ poll_wait(file, &q->done_wq, wait); if (!q->is_output && !(req_events & (EPOLLIN | EPOLLRDNORM))) return 0; if (q->is_output && !(req_events & (EPOLLOUT | EPOLLWRNORM))) return 0; /* * Start file I/O emulator only if streaming API has not been used yet. */ if (vb2_get_num_buffers(q) == 0 && !vb2_fileio_is_active(q)) { if (!q->is_output && (q->io_modes & VB2_READ) && (req_events & (EPOLLIN | EPOLLRDNORM))) { if (__vb2_init_fileio(q, 1)) return EPOLLERR; } if (q->is_output && (q->io_modes & VB2_WRITE) && (req_events & (EPOLLOUT | EPOLLWRNORM))) { if (__vb2_init_fileio(q, 0)) return EPOLLERR; /* * Write to OUTPUT queue can be done immediately. */ return EPOLLOUT | EPOLLWRNORM; } } /* * There is nothing to wait for if the queue isn't streaming, or if the * error flag is set. */ if (!vb2_is_streaming(q) || q->error) return EPOLLERR; /* * If this quirk is set and QBUF hasn't been called yet then * return EPOLLERR as well. This only affects capture queues, output * queues will always initialize waiting_for_buffers to false. * This quirk is set by V4L2 for backwards compatibility reasons. */ if (q->quirk_poll_must_check_waiting_for_buffers && q->waiting_for_buffers && (req_events & (EPOLLIN | EPOLLRDNORM))) return EPOLLERR; /* * For output streams you can call write() as long as there are fewer * buffers queued than there are buffers available. */ if (q->is_output && q->fileio && q->queued_count < vb2_get_num_buffers(q)) return EPOLLOUT | EPOLLWRNORM; if (list_empty(&q->done_list)) { /* * If the last buffer was dequeued from a capture queue, * return immediately. DQBUF will return -EPIPE. */ if (q->last_buffer_dequeued) return EPOLLIN | EPOLLRDNORM; } /* * Take first buffer available for dequeuing. */ spin_lock_irqsave(&q->done_lock, flags); if (!list_empty(&q->done_list)) vb = list_first_entry(&q->done_list, struct vb2_buffer, done_entry); spin_unlock_irqrestore(&q->done_lock, flags); if (vb && (vb->state == VB2_BUF_STATE_DONE || vb->state == VB2_BUF_STATE_ERROR)) { return (q->is_output) ? EPOLLOUT | EPOLLWRNORM : EPOLLIN | EPOLLRDNORM; } return 0; } EXPORT_SYMBOL_GPL(vb2_core_poll); /* * struct vb2_fileio_buf - buffer context used by file io emulator * * vb2 provides a compatibility layer and emulator of file io (read and * write) calls on top of streaming API. This structure is used for * tracking context related to the buffers. */ struct vb2_fileio_buf { void *vaddr; unsigned int size; unsigned int pos; unsigned int queued:1; }; /* * struct vb2_fileio_data - queue context used by file io emulator * * @cur_index: the index of the buffer currently being read from or * written to. If equal to number of buffers in the vb2_queue * then a new buffer must be dequeued. * @initial_index: in the read() case all buffers are queued up immediately * in __vb2_init_fileio() and __vb2_perform_fileio() just cycles * buffers. However, in the write() case no buffers are initially * queued, instead whenever a buffer is full it is queued up by * __vb2_perform_fileio(). Only once all available buffers have * been queued up will __vb2_perform_fileio() start to dequeue * buffers. This means that initially __vb2_perform_fileio() * needs to know what buffer index to use when it is queuing up * the buffers for the first time. That initial index is stored * in this field. Once it is equal to number of buffers in the * vb2_queue all available buffers have been queued and * __vb2_perform_fileio() should start the normal dequeue/queue cycle. * * vb2 provides a compatibility layer and emulator of file io (read and * write) calls on top of streaming API. For proper operation it required * this structure to save the driver state between each call of the read * or write function. */ struct vb2_fileio_data { unsigned int count; unsigned int type; unsigned int memory; struct vb2_fileio_buf bufs[VB2_MAX_FRAME]; unsigned int cur_index; unsigned int initial_index; unsigned int q_count; unsigned int dq_count; unsigned read_once:1; unsigned write_immediately:1; }; /* * __vb2_init_fileio() - initialize file io emulator * @q: videobuf2 queue * @read: mode selector (1 means read, 0 means write) */ static int __vb2_init_fileio(struct vb2_queue *q, int read) { struct vb2_fileio_data *fileio; struct vb2_buffer *vb; int i, ret; /* * Sanity check */ if (WARN_ON((read && !(q->io_modes & VB2_READ)) || (!read && !(q->io_modes & VB2_WRITE)))) return -EINVAL; /* * Check if device supports mapping buffers to kernel virtual space. */ if (!q->mem_ops->vaddr) return -EBUSY; /* * Check if streaming api has not been already activated. */ if (q->streaming || vb2_get_num_buffers(q) > 0) return -EBUSY; dprintk(q, 3, "setting up file io: mode %s, count %d, read_once %d, write_immediately %d\n", (read) ? "read" : "write", q->min_reqbufs_allocation, q->fileio_read_once, q->fileio_write_immediately); fileio = kzalloc(sizeof(*fileio), GFP_KERNEL); if (fileio == NULL) return -ENOMEM; fileio->read_once = q->fileio_read_once; fileio->write_immediately = q->fileio_write_immediately; /* * Request buffers and use MMAP type to force driver * to allocate buffers by itself. */ fileio->count = q->min_reqbufs_allocation; fileio->memory = VB2_MEMORY_MMAP; fileio->type = q->type; q->fileio = fileio; ret = vb2_core_reqbufs(q, fileio->memory, 0, &fileio->count); if (ret) goto err_kfree; /* vb2_fileio_data supports max VB2_MAX_FRAME buffers */ if (fileio->count > VB2_MAX_FRAME) { dprintk(q, 1, "fileio: more than VB2_MAX_FRAME buffers requested\n"); ret = -ENOSPC; goto err_reqbufs; } /* * Userspace can never add or delete buffers later, so there * will never be holes. It is safe to assume that vb2_get_buffer(q, 0) * will always return a valid vb pointer */ vb = vb2_get_buffer(q, 0); /* * Check if plane_count is correct * (multiplane buffers are not supported). */ if (vb->num_planes != 1) { ret = -EBUSY; goto err_reqbufs; } /* * Get kernel address of each buffer. */ for (i = 0; i < vb2_get_num_buffers(q); i++) { /* vb can never be NULL when using fileio. */ vb = vb2_get_buffer(q, i); fileio->bufs[i].vaddr = vb2_plane_vaddr(vb, 0); if (fileio->bufs[i].vaddr == NULL) { ret = -EINVAL; goto err_reqbufs; } fileio->bufs[i].size = vb2_plane_size(vb, 0); } /* * Read mode requires pre queuing of all buffers. */ if (read) { /* * Queue all buffers. */ for (i = 0; i < vb2_get_num_buffers(q); i++) { struct vb2_buffer *vb2 = vb2_get_buffer(q, i); if (!vb2) continue; ret = vb2_core_qbuf(q, vb2, NULL, NULL); if (ret) goto err_reqbufs; fileio->bufs[i].queued = 1; } /* * All buffers have been queued, so mark that by setting * initial_index to the number of buffers in the vb2_queue */ fileio->initial_index = vb2_get_num_buffers(q); fileio->cur_index = fileio->initial_index; } /* * Start streaming. */ ret = vb2_core_streamon(q, q->type); if (ret) goto err_reqbufs; return ret; err_reqbufs: fileio->count = 0; vb2_core_reqbufs(q, fileio->memory, 0, &fileio->count); err_kfree: q->fileio = NULL; kfree(fileio); return ret; } /* * __vb2_cleanup_fileio() - free resourced used by file io emulator * @q: videobuf2 queue */ static int __vb2_cleanup_fileio(struct vb2_queue *q) { struct vb2_fileio_data *fileio = q->fileio; if (fileio) { vb2_core_streamoff(q, q->type); q->fileio = NULL; fileio->count = 0; vb2_core_reqbufs(q, fileio->memory, 0, &fileio->count); kfree(fileio); dprintk(q, 3, "file io emulator closed\n"); } return 0; } /* * __vb2_perform_fileio() - perform a single file io (read or write) operation * @q: videobuf2 queue * @data: pointed to target userspace buffer * @count: number of bytes to read or write * @ppos: file handle position tracking pointer * @nonblock: mode selector (1 means blocking calls, 0 means nonblocking) * @read: access mode selector (1 means read, 0 means write) */ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_t count, loff_t *ppos, int nonblock, int read) { struct vb2_fileio_data *fileio; struct vb2_fileio_buf *buf; bool is_multiplanar = q->is_multiplanar; /* * When using write() to write data to an output video node the vb2 core * should copy timestamps if V4L2_BUF_FLAG_TIMESTAMP_COPY is set. Nobody * else is able to provide this information with the write() operation. */ bool copy_timestamp = !read && q->copy_timestamp; unsigned index; int ret; dprintk(q, 3, "mode %s, offset %ld, count %zd, %sblocking\n", read ? "read" : "write", (long)*ppos, count, nonblock ? "non" : ""); if (!data) return -EINVAL; if (q->waiting_in_dqbuf) { dprintk(q, 3, "another dup()ped fd is %s\n", read ? "reading" : "writing"); return -EBUSY; } /* * Initialize emulator on first call. */ if (!vb2_fileio_is_active(q)) { ret = __vb2_init_fileio(q, read); dprintk(q, 3, "vb2_init_fileio result: %d\n", ret); if (ret) return ret; } fileio = q->fileio; /* * Check if we need to dequeue the buffer. */ index = fileio->cur_index; if (index >= vb2_get_num_buffers(q)) { struct vb2_buffer *b; /* * Call vb2_dqbuf to get buffer back. */ ret = vb2_core_dqbuf(q, &index, NULL, nonblock); dprintk(q, 5, "vb2_dqbuf result: %d\n", ret); if (ret) return ret; fileio->dq_count += 1; fileio->cur_index = index; buf = &fileio->bufs[index]; /* b can never be NULL when using fileio. */ b = vb2_get_buffer(q, index); /* * Get number of bytes filled by the driver */ buf->pos = 0; buf->queued = 0; buf->size = read ? vb2_get_plane_payload(b, 0) : vb2_plane_size(b, 0); /* Compensate for data_offset on read in the multiplanar case. */ if (is_multiplanar && read && b->planes[0].data_offset < buf->size) { buf->pos = b->planes[0].data_offset; buf->size -= buf->pos; } } else { buf = &fileio->bufs[index]; } /* * Limit count on last few bytes of the buffer. */ if (buf->pos + count > buf->size) { count = buf->size - buf->pos; dprintk(q, 5, "reducing read count: %zd\n", count); } /* * Transfer data to userspace. */ dprintk(q, 3, "copying %zd bytes - buffer %d, offset %u\n", count, index, buf->pos); if (read) ret = copy_to_user(data, buf->vaddr + buf->pos, count); else ret = copy_from_user(buf->vaddr + buf->pos, data, count); if (ret) { dprintk(q, 3, "error copying data\n"); return -EFAULT; } /* * Update counters. */ buf->pos += count; *ppos += count; /* * Queue next buffer if required. */ if (buf->pos == buf->size || (!read && fileio->write_immediately)) { /* b can never be NULL when using fileio. */ struct vb2_buffer *b = vb2_get_buffer(q, index); /* * Check if this is the last buffer to read. */ if (read && fileio->read_once && fileio->dq_count == 1) { dprintk(q, 3, "read limit reached\n"); return __vb2_cleanup_fileio(q); } /* * Call vb2_qbuf and give buffer to the driver. */ b->planes[0].bytesused = buf->pos; if (copy_timestamp) b->timestamp = ktime_get_ns(); ret = vb2_core_qbuf(q, b, NULL, NULL); dprintk(q, 5, "vb2_qbuf result: %d\n", ret); if (ret) return ret; /* * Buffer has been queued, update the status */ buf->pos = 0; buf->queued = 1; buf->size = vb2_plane_size(b, 0); fileio->q_count += 1; /* * If we are queuing up buffers for the first time, then * increase initial_index by one. */ if (fileio->initial_index < vb2_get_num_buffers(q)) fileio->initial_index++; /* * The next buffer to use is either a buffer that's going to be * queued for the first time (initial_index < number of buffers in the vb2_queue) * or it is equal to the number of buffers in the vb2_queue, * meaning that the next time we need to dequeue a buffer since * we've now queued up all the 'first time' buffers. */ fileio->cur_index = fileio->initial_index; } /* * Return proper number of bytes processed. */ if (ret == 0) ret = count; return ret; } size_t vb2_read(struct vb2_queue *q, char __user *data, size_t count, loff_t *ppos, int nonblocking) { return __vb2_perform_fileio(q, data, count, ppos, nonblocking, 1); } EXPORT_SYMBOL_GPL(vb2_read); size_t vb2_write(struct vb2_queue *q, const char __user *data, size_t count, loff_t *ppos, int nonblocking) { return __vb2_perform_fileio(q, (char __user *) data, count, ppos, nonblocking, 0); } EXPORT_SYMBOL_GPL(vb2_write); struct vb2_threadio_data { struct task_struct *thread; vb2_thread_fnc fnc; void *priv; bool stop; }; static int vb2_thread(void *data) { struct vb2_queue *q = data; struct vb2_threadio_data *threadio = q->threadio; bool copy_timestamp = false; unsigned prequeue = 0; unsigned index = 0; int ret = 0; if (q->is_output) { prequeue = vb2_get_num_buffers(q); copy_timestamp = q->copy_timestamp; } set_freezable(); for (;;) { struct vb2_buffer *vb; /* * Call vb2_dqbuf to get buffer back. */ if (prequeue) { vb = vb2_get_buffer(q, index++); if (!vb) continue; prequeue--; } else { call_void_qop(q, wait_finish, q); if (!threadio->stop) ret = vb2_core_dqbuf(q, &index, NULL, 0); call_void_qop(q, wait_prepare, q); dprintk(q, 5, "file io: vb2_dqbuf result: %d\n", ret); if (!ret) vb = vb2_get_buffer(q, index); } if (ret || threadio->stop) break; try_to_freeze(); if (vb->state != VB2_BUF_STATE_ERROR) if (threadio->fnc(vb, threadio->priv)) break; call_void_qop(q, wait_finish, q); if (copy_timestamp) vb->timestamp = ktime_get_ns(); if (!threadio->stop) ret = vb2_core_qbuf(q, vb, NULL, NULL); call_void_qop(q, wait_prepare, q); if (ret || threadio->stop) break; } /* Hmm, linux becomes *very* unhappy without this ... */ while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); schedule(); } return 0; } /* * This function should not be used for anything else but the videobuf2-dvb * support. If you think you have another good use-case for this, then please * contact the linux-media mailinglist first. */ int vb2_thread_start(struct vb2_queue *q, vb2_thread_fnc fnc, void *priv, const char *thread_name) { struct vb2_threadio_data *threadio; int ret = 0; if (q->threadio) return -EBUSY; if (vb2_is_busy(q)) return -EBUSY; if (WARN_ON(q->fileio)) return -EBUSY; threadio = kzalloc(sizeof(*threadio), GFP_KERNEL); if (threadio == NULL) return -ENOMEM; threadio->fnc = fnc; threadio->priv = priv; ret = __vb2_init_fileio(q, !q->is_output); dprintk(q, 3, "file io: vb2_init_fileio result: %d\n", ret); if (ret) goto nomem; q->threadio = threadio; threadio->thread = kthread_run(vb2_thread, q, "vb2-%s", thread_name); if (IS_ERR(threadio->thread)) { ret = PTR_ERR(threadio->thread); threadio->thread = NULL; goto nothread; } return 0; nothread: __vb2_cleanup_fileio(q); nomem: kfree(threadio); return ret; } EXPORT_SYMBOL_GPL(vb2_thread_start); int vb2_thread_stop(struct vb2_queue *q) { struct vb2_threadio_data *threadio = q->threadio; int err; if (threadio == NULL) return 0; threadio->stop = true; /* Wake up all pending sleeps in the thread */ vb2_queue_error(q); err = kthread_stop(threadio->thread); __vb2_cleanup_fileio(q); threadio->thread = NULL; kfree(threadio); q->threadio = NULL; return err; } EXPORT_SYMBOL_GPL(vb2_thread_stop); MODULE_DESCRIPTION("Media buffer core framework"); MODULE_AUTHOR("Pawel Osciak <pawel@osciak.com>, Marek Szyprowski"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS(DMA_BUF);
2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 // SPDX-License-Identifier: GPL-2.0-or-later /* cx25840 - Conexant CX25840 audio/video decoder driver * * Copyright (C) 2004 Ulf Eklund * * Based on the saa7115 driver and on the first version of Chris Kennedy's * cx25840 driver. * * Changes by Tyler Trafford <tatrafford@comcast.net> * - cleanup/rewrite for V4L2 API (2005) * * VBI support by Hans Verkuil <hverkuil@xs4all.nl>. * * NTSC sliced VBI support by Christopher Neufeld <television@cneufeld.ca> * with additional fixes by Hans Verkuil <hverkuil@xs4all.nl>. * * CX23885 support by Steven Toth <stoth@linuxtv.org>. * * CX2388[578] IRQ handling, IO Pin mux configuration and other small fixes are * Copyright (C) 2010 Andy Walls <awalls@md.metrocast.net> * * CX23888 DIF support for the HVR1850 * Copyright (C) 2011 Steven Toth <stoth@kernellabs.com> * * CX2584x pin to pad mapping and output format configuration support are * Copyright (C) 2011 Maciej S. Szmigiero <mail@maciej.szmigiero.name> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/videodev2.h> #include <linux/i2c.h> #include <linux/delay.h> #include <linux/math64.h> #include <media/v4l2-common.h> #include <media/drv-intf/cx25840.h> #include "cx25840-core.h" MODULE_DESCRIPTION("Conexant CX25840 audio/video decoder driver"); MODULE_AUTHOR("Ulf Eklund, Chris Kennedy, Hans Verkuil, Tyler Trafford"); MODULE_LICENSE("GPL"); #define CX25840_VID_INT_STAT_REG 0x410 #define CX25840_VID_INT_STAT_BITS 0x0000ffff #define CX25840_VID_INT_MASK_BITS 0xffff0000 #define CX25840_VID_INT_MASK_SHFT 16 #define CX25840_VID_INT_MASK_REG 0x412 #define CX23885_AUD_MC_INT_MASK_REG 0x80c #define CX23885_AUD_MC_INT_STAT_BITS 0xffff0000 #define CX23885_AUD_MC_INT_CTRL_BITS 0x0000ffff #define CX23885_AUD_MC_INT_STAT_SHFT 16 #define CX25840_AUD_INT_CTRL_REG 0x812 #define CX25840_AUD_INT_STAT_REG 0x813 #define CX23885_PIN_CTRL_IRQ_REG 0x123 #define CX23885_PIN_CTRL_IRQ_IR_STAT 0x40 #define CX23885_PIN_CTRL_IRQ_AUD_STAT 0x20 #define CX23885_PIN_CTRL_IRQ_VID_STAT 0x10 #define CX25840_IR_STATS_REG 0x210 #define CX25840_IR_IRQEN_REG 0x214 static int cx25840_debug; module_param_named(debug, cx25840_debug, int, 0644); MODULE_PARM_DESC(debug, "Debugging messages [0=Off (default) 1=On]"); /* ----------------------------------------------------------------------- */ static void cx23888_std_setup(struct i2c_client *client); int cx25840_write(struct i2c_client *client, u16 addr, u8 value) { u8 buffer[3]; buffer[0] = addr >> 8; buffer[1] = addr & 0xff; buffer[2] = value; return i2c_master_send(client, buffer, 3); } int cx25840_write4(struct i2c_client *client, u16 addr, u32 value) { u8 buffer[6]; buffer[0] = addr >> 8; buffer[1] = addr & 0xff; buffer[2] = value & 0xff; buffer[3] = (value >> 8) & 0xff; buffer[4] = (value >> 16) & 0xff; buffer[5] = value >> 24; return i2c_master_send(client, buffer, 6); } u8 cx25840_read(struct i2c_client *client, u16 addr) { struct i2c_msg msgs[2]; u8 tx_buf[2], rx_buf[1]; /* Write register address */ tx_buf[0] = addr >> 8; tx_buf[1] = addr & 0xff; msgs[0].addr = client->addr; msgs[0].flags = 0; msgs[0].len = 2; msgs[0].buf = (char *)tx_buf; /* Read data from register */ msgs[1].addr = client->addr; msgs[1].flags = I2C_M_RD; msgs[1].len = 1; msgs[1].buf = (char *)rx_buf; if (i2c_transfer(client->adapter, msgs, 2) < 2) return 0; return rx_buf[0]; } u32 cx25840_read4(struct i2c_client *client, u16 addr) { struct i2c_msg msgs[2]; u8 tx_buf[2], rx_buf[4]; /* Write register address */ tx_buf[0] = addr >> 8; tx_buf[1] = addr & 0xff; msgs[0].addr = client->addr; msgs[0].flags = 0; msgs[0].len = 2; msgs[0].buf = (char *)tx_buf; /* Read data from registers */ msgs[1].addr = client->addr; msgs[1].flags = I2C_M_RD; msgs[1].len = 4; msgs[1].buf = (char *)rx_buf; if (i2c_transfer(client->adapter, msgs, 2) < 2) return 0; return (rx_buf[3] << 24) | (rx_buf[2] << 16) | (rx_buf[1] << 8) | rx_buf[0]; } int cx25840_and_or(struct i2c_client *client, u16 addr, unsigned int and_mask, u8 or_value) { return cx25840_write(client, addr, (cx25840_read(client, addr) & and_mask) | or_value); } int cx25840_and_or4(struct i2c_client *client, u16 addr, u32 and_mask, u32 or_value) { return cx25840_write4(client, addr, (cx25840_read4(client, addr) & and_mask) | or_value); } /* ----------------------------------------------------------------------- */ static int set_input(struct i2c_client *client, enum cx25840_video_input vid_input, enum cx25840_audio_input aud_input); /* ----------------------------------------------------------------------- */ static int cx23885_s_io_pin_config(struct v4l2_subdev *sd, size_t n, struct v4l2_subdev_io_pin_config *p) { struct i2c_client *client = v4l2_get_subdevdata(sd); int i; u32 pin_ctrl; u8 gpio_oe, gpio_data, strength; pin_ctrl = cx25840_read4(client, 0x120); gpio_oe = cx25840_read(client, 0x160); gpio_data = cx25840_read(client, 0x164); for (i = 0; i < n; i++) { strength = p[i].strength; if (strength > CX25840_PIN_DRIVE_FAST) strength = CX25840_PIN_DRIVE_FAST; switch (p[i].pin) { case CX23885_PIN_IRQ_N_GPIO16: if (p[i].function != CX23885_PAD_IRQ_N) { /* GPIO16 */ pin_ctrl &= ~(0x1 << 25); } else { /* IRQ_N */ if (p[i].flags & (BIT(V4L2_SUBDEV_IO_PIN_DISABLE) | BIT(V4L2_SUBDEV_IO_PIN_INPUT))) { pin_ctrl &= ~(0x1 << 25); } else { pin_ctrl |= (0x1 << 25); } if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_ACTIVE_LOW)) { pin_ctrl &= ~(0x1 << 24); } else { pin_ctrl |= (0x1 << 24); } } break; case CX23885_PIN_IR_RX_GPIO19: if (p[i].function != CX23885_PAD_GPIO19) { /* IR_RX */ gpio_oe |= (0x1 << 0); pin_ctrl &= ~(0x3 << 18); pin_ctrl |= (strength << 18); } else { /* GPIO19 */ gpio_oe &= ~(0x1 << 0); if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) { gpio_data &= ~(0x1 << 0); gpio_data |= ((p[i].value & 0x1) << 0); } pin_ctrl &= ~(0x3 << 12); pin_ctrl |= (strength << 12); } break; case CX23885_PIN_IR_TX_GPIO20: if (p[i].function != CX23885_PAD_GPIO20) { /* IR_TX */ gpio_oe |= (0x1 << 1); if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_DISABLE)) pin_ctrl &= ~(0x1 << 10); else pin_ctrl |= (0x1 << 10); pin_ctrl &= ~(0x3 << 18); pin_ctrl |= (strength << 18); } else { /* GPIO20 */ gpio_oe &= ~(0x1 << 1); if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) { gpio_data &= ~(0x1 << 1); gpio_data |= ((p[i].value & 0x1) << 1); } pin_ctrl &= ~(0x3 << 12); pin_ctrl |= (strength << 12); } break; case CX23885_PIN_I2S_SDAT_GPIO21: if (p[i].function != CX23885_PAD_GPIO21) { /* I2S_SDAT */ /* TODO: Input or Output config */ gpio_oe |= (0x1 << 2); pin_ctrl &= ~(0x3 << 22); pin_ctrl |= (strength << 22); } else { /* GPIO21 */ gpio_oe &= ~(0x1 << 2); if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) { gpio_data &= ~(0x1 << 2); gpio_data |= ((p[i].value & 0x1) << 2); } pin_ctrl &= ~(0x3 << 12); pin_ctrl |= (strength << 12); } break; case CX23885_PIN_I2S_WCLK_GPIO22: if (p[i].function != CX23885_PAD_GPIO22) { /* I2S_WCLK */ /* TODO: Input or Output config */ gpio_oe |= (0x1 << 3); pin_ctrl &= ~(0x3 << 22); pin_ctrl |= (strength << 22); } else { /* GPIO22 */ gpio_oe &= ~(0x1 << 3); if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) { gpio_data &= ~(0x1 << 3); gpio_data |= ((p[i].value & 0x1) << 3); } pin_ctrl &= ~(0x3 << 12); pin_ctrl |= (strength << 12); } break; case CX23885_PIN_I2S_BCLK_GPIO23: if (p[i].function != CX23885_PAD_GPIO23) { /* I2S_BCLK */ /* TODO: Input or Output config */ gpio_oe |= (0x1 << 4); pin_ctrl &= ~(0x3 << 22); pin_ctrl |= (strength << 22); } else { /* GPIO23 */ gpio_oe &= ~(0x1 << 4); if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) { gpio_data &= ~(0x1 << 4); gpio_data |= ((p[i].value & 0x1) << 4); } pin_ctrl &= ~(0x3 << 12); pin_ctrl |= (strength << 12); } break; } } cx25840_write(client, 0x164, gpio_data); cx25840_write(client, 0x160, gpio_oe); cx25840_write4(client, 0x120, pin_ctrl); return 0; } static u8 cx25840_function_to_pad(struct i2c_client *client, u8 function) { if (function > CX25840_PAD_VRESET) { v4l_err(client, "invalid function %u, assuming default\n", (unsigned int)function); return 0; } return function; } static void cx25840_set_invert(u8 *pinctrl3, u8 *voutctrl4, u8 function, u8 pin, bool invert) { switch (function) { case CX25840_PAD_IRQ_N: if (invert) *pinctrl3 &= ~2; else *pinctrl3 |= 2; break; case CX25840_PAD_ACTIVE: if (invert) *voutctrl4 |= BIT(2); else *voutctrl4 &= ~BIT(2); break; case CX25840_PAD_VACTIVE: if (invert) *voutctrl4 |= BIT(5); else *voutctrl4 &= ~BIT(5); break; case CX25840_PAD_CBFLAG: if (invert) *voutctrl4 |= BIT(4); else *voutctrl4 &= ~BIT(4); break; case CX25840_PAD_VRESET: if (invert) *voutctrl4 |= BIT(0); else *voutctrl4 &= ~BIT(0); break; } if (function != CX25840_PAD_DEFAULT) return; switch (pin) { case CX25840_PIN_DVALID_PRGM0: if (invert) *voutctrl4 |= BIT(6); else *voutctrl4 &= ~BIT(6); break; case CX25840_PIN_HRESET_PRGM2: if (invert) *voutctrl4 |= BIT(1); else *voutctrl4 &= ~BIT(1); break; } } static int cx25840_s_io_pin_config(struct v4l2_subdev *sd, size_t n, struct v4l2_subdev_io_pin_config *p) { struct i2c_client *client = v4l2_get_subdevdata(sd); unsigned int i; u8 pinctrl[6], pinconf[10], voutctrl4; for (i = 0; i < 6; i++) pinctrl[i] = cx25840_read(client, 0x114 + i); for (i = 0; i < 10; i++) pinconf[i] = cx25840_read(client, 0x11c + i); voutctrl4 = cx25840_read(client, 0x407); for (i = 0; i < n; i++) { u8 strength = p[i].strength; if (strength != CX25840_PIN_DRIVE_SLOW && strength != CX25840_PIN_DRIVE_MEDIUM && strength != CX25840_PIN_DRIVE_FAST) { v4l_err(client, "invalid drive speed for pin %u (%u), assuming fast\n", (unsigned int)p[i].pin, (unsigned int)strength); strength = CX25840_PIN_DRIVE_FAST; } switch (p[i].pin) { case CX25840_PIN_DVALID_PRGM0: if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_DISABLE)) pinctrl[0] &= ~BIT(6); else pinctrl[0] |= BIT(6); pinconf[3] &= 0xf0; pinconf[3] |= cx25840_function_to_pad(client, p[i].function); cx25840_set_invert(&pinctrl[3], &voutctrl4, p[i].function, CX25840_PIN_DVALID_PRGM0, p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_ACTIVE_LOW)); pinctrl[4] &= ~(3 << 2); /* CX25840_PIN_DRIVE_MEDIUM */ switch (strength) { case CX25840_PIN_DRIVE_SLOW: pinctrl[4] |= 1 << 2; break; case CX25840_PIN_DRIVE_FAST: pinctrl[4] |= 2 << 2; break; } break; case CX25840_PIN_HRESET_PRGM2: if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_DISABLE)) pinctrl[1] &= ~BIT(0); else pinctrl[1] |= BIT(0); pinconf[4] &= 0xf0; pinconf[4] |= cx25840_function_to_pad(client, p[i].function); cx25840_set_invert(&pinctrl[3], &voutctrl4, p[i].function, CX25840_PIN_HRESET_PRGM2, p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_ACTIVE_LOW)); pinctrl[4] &= ~(3 << 2); /* CX25840_PIN_DRIVE_MEDIUM */ switch (strength) { case CX25840_PIN_DRIVE_SLOW: pinctrl[4] |= 1 << 2; break; case CX25840_PIN_DRIVE_FAST: pinctrl[4] |= 2 << 2; break; } break; case CX25840_PIN_PLL_CLK_PRGM7: if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_DISABLE)) pinctrl[2] &= ~BIT(2); else pinctrl[2] |= BIT(2); switch (p[i].function) { case CX25840_PAD_XTI_X5_DLL: pinconf[6] = 0; break; case CX25840_PAD_AUX_PLL: pinconf[6] = 1; break; case CX25840_PAD_VID_PLL: pinconf[6] = 5; break; case CX25840_PAD_XTI: pinconf[6] = 2; break; default: pinconf[6] = 3; pinconf[6] |= cx25840_function_to_pad(client, p[i].function) << 4; } break; default: v4l_err(client, "invalid or unsupported pin %u\n", (unsigned int)p[i].pin); break; } } cx25840_write(client, 0x407, voutctrl4); for (i = 0; i < 6; i++) cx25840_write(client, 0x114 + i, pinctrl[i]); for (i = 0; i < 10; i++) cx25840_write(client, 0x11c + i, pinconf[i]); return 0; } static int common_s_io_pin_config(struct v4l2_subdev *sd, size_t n, struct v4l2_subdev_io_pin_config *pincfg) { struct cx25840_state *state = to_state(sd); if (is_cx2388x(state)) return cx23885_s_io_pin_config(sd, n, pincfg); else if (is_cx2584x(state)) return cx25840_s_io_pin_config(sd, n, pincfg); return 0; } /* ----------------------------------------------------------------------- */ static void init_dll1(struct i2c_client *client) { /* * This is the Hauppauge sequence used to * initialize the Delay Lock Loop 1 (ADC DLL). */ cx25840_write(client, 0x159, 0x23); cx25840_write(client, 0x15a, 0x87); cx25840_write(client, 0x15b, 0x06); udelay(10); cx25840_write(client, 0x159, 0xe1); udelay(10); cx25840_write(client, 0x15a, 0x86); cx25840_write(client, 0x159, 0xe0); cx25840_write(client, 0x159, 0xe1); cx25840_write(client, 0x15b, 0x10); } static void init_dll2(struct i2c_client *client) { /* * This is the Hauppauge sequence used to * initialize the Delay Lock Loop 2 (ADC DLL). */ cx25840_write(client, 0x15d, 0xe3); cx25840_write(client, 0x15e, 0x86); cx25840_write(client, 0x15f, 0x06); udelay(10); cx25840_write(client, 0x15d, 0xe1); cx25840_write(client, 0x15d, 0xe0); cx25840_write(client, 0x15d, 0xe1); } static void cx25836_initialize(struct i2c_client *client) { /* *reset configuration is described on page 3-77 * of the CX25836 datasheet */ /* 2. */ cx25840_and_or(client, 0x000, ~0x01, 0x01); cx25840_and_or(client, 0x000, ~0x01, 0x00); /* 3a. */ cx25840_and_or(client, 0x15a, ~0x70, 0x00); /* 3b. */ cx25840_and_or(client, 0x15b, ~0x1e, 0x06); /* 3c. */ cx25840_and_or(client, 0x159, ~0x02, 0x02); /* 3d. */ udelay(10); /* 3e. */ cx25840_and_or(client, 0x159, ~0x02, 0x00); /* 3f. */ cx25840_and_or(client, 0x159, ~0xc0, 0xc0); /* 3g. */ cx25840_and_or(client, 0x159, ~0x01, 0x00); cx25840_and_or(client, 0x159, ~0x01, 0x01); /* 3h. */ cx25840_and_or(client, 0x15b, ~0x1e, 0x10); } static void cx25840_work_handler(struct work_struct *work) { struct cx25840_state *state = container_of(work, struct cx25840_state, fw_work); cx25840_loadfw(state->c); wake_up(&state->fw_wait); } #define CX25840_VCONFIG_SET_BIT(state, opt_msk, voc, idx, bit, oneval) \ do { \ if ((state)->vid_config & (opt_msk)) { \ if (((state)->vid_config & (opt_msk)) == \ (oneval)) \ (voc)[idx] |= BIT(bit); \ else \ (voc)[idx] &= ~BIT(bit); \ } \ } while (0) /* apply current vconfig to hardware regs */ static void cx25840_vconfig_apply(struct i2c_client *client) { struct cx25840_state *state = to_state(i2c_get_clientdata(client)); u8 voutctrl[3]; unsigned int i; for (i = 0; i < 3; i++) voutctrl[i] = cx25840_read(client, 0x404 + i); if (state->vid_config & CX25840_VCONFIG_FMT_MASK) voutctrl[0] &= ~3; switch (state->vid_config & CX25840_VCONFIG_FMT_MASK) { case CX25840_VCONFIG_FMT_BT656: voutctrl[0] |= 1; break; case CX25840_VCONFIG_FMT_VIP11: voutctrl[0] |= 2; break; case CX25840_VCONFIG_FMT_VIP2: voutctrl[0] |= 3; break; case CX25840_VCONFIG_FMT_BT601: /* zero */ default: break; } CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_RES_MASK, voutctrl, 0, 2, CX25840_VCONFIG_RES_10BIT); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_VBIRAW_MASK, voutctrl, 0, 3, CX25840_VCONFIG_VBIRAW_ENABLED); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_ANCDATA_MASK, voutctrl, 0, 4, CX25840_VCONFIG_ANCDATA_ENABLED); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_TASKBIT_MASK, voutctrl, 0, 5, CX25840_VCONFIG_TASKBIT_ONE); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_ACTIVE_MASK, voutctrl, 1, 2, CX25840_VCONFIG_ACTIVE_HORIZONTAL); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_VALID_MASK, voutctrl, 1, 3, CX25840_VCONFIG_VALID_ANDACTIVE); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_HRESETW_MASK, voutctrl, 1, 4, CX25840_VCONFIG_HRESETW_PIXCLK); if (state->vid_config & CX25840_VCONFIG_CLKGATE_MASK) voutctrl[1] &= ~(3 << 6); switch (state->vid_config & CX25840_VCONFIG_CLKGATE_MASK) { case CX25840_VCONFIG_CLKGATE_VALID: voutctrl[1] |= 2; break; case CX25840_VCONFIG_CLKGATE_VALIDACTIVE: voutctrl[1] |= 3; break; case CX25840_VCONFIG_CLKGATE_NONE: /* zero */ default: break; } CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_DCMODE_MASK, voutctrl, 2, 0, CX25840_VCONFIG_DCMODE_BYTES); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_IDID0S_MASK, voutctrl, 2, 1, CX25840_VCONFIG_IDID0S_LINECNT); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_VIPCLAMP_MASK, voutctrl, 2, 4, CX25840_VCONFIG_VIPCLAMP_ENABLED); for (i = 0; i < 3; i++) cx25840_write(client, 0x404 + i, voutctrl[i]); } static void cx25840_initialize(struct i2c_client *client) { DEFINE_WAIT(wait); struct cx25840_state *state = to_state(i2c_get_clientdata(client)); struct workqueue_struct *q; /* datasheet startup in numbered steps, refer to page 3-77 */ /* 2. */ cx25840_and_or(client, 0x803, ~0x10, 0x00); /* * The default of this register should be 4, but I get 0 instead. * Set this register to 4 manually. */ cx25840_write(client, 0x000, 0x04); /* 3. */ init_dll1(client); init_dll2(client); cx25840_write(client, 0x136, 0x0a); /* 4. */ cx25840_write(client, 0x13c, 0x01); cx25840_write(client, 0x13c, 0x00); /* 5. */ /* * Do the firmware load in a work handler to prevent. * Otherwise the kernel is blocked waiting for the * bit-banging i2c interface to finish uploading the * firmware. */ INIT_WORK(&state->fw_work, cx25840_work_handler); init_waitqueue_head(&state->fw_wait); q = create_singlethread_workqueue("cx25840_fw"); if (q) { prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); queue_work(q, &state->fw_work); schedule(); finish_wait(&state->fw_wait, &wait); destroy_workqueue(q); } /* 6. */ cx25840_write(client, 0x115, 0x8c); cx25840_write(client, 0x116, 0x07); cx25840_write(client, 0x118, 0x02); /* 7. */ cx25840_write(client, 0x4a5, 0x80); cx25840_write(client, 0x4a5, 0x00); cx25840_write(client, 0x402, 0x00); /* 8. */ cx25840_and_or(client, 0x401, ~0x18, 0); cx25840_and_or(client, 0x4a2, ~0x10, 0x10); /* steps 8c and 8d are done in change_input() */ /* 10. */ cx25840_write(client, 0x8d3, 0x1f); cx25840_write(client, 0x8e3, 0x03); cx25840_std_setup(client); /* trial and error says these are needed to get audio */ cx25840_write(client, 0x914, 0xa0); cx25840_write(client, 0x918, 0xa0); cx25840_write(client, 0x919, 0x01); /* stereo preferred */ cx25840_write(client, 0x809, 0x04); /* AC97 shift */ cx25840_write(client, 0x8cf, 0x0f); /* (re)set input */ set_input(client, state->vid_input, state->aud_input); if (state->generic_mode) cx25840_vconfig_apply(client); /* start microcontroller */ cx25840_and_or(client, 0x803, ~0x10, 0x10); } static void cx23885_initialize(struct i2c_client *client) { DEFINE_WAIT(wait); struct cx25840_state *state = to_state(i2c_get_clientdata(client)); u32 clk_freq = 0; struct workqueue_struct *q; /* cx23885 sets hostdata to clk_freq pointer */ if (v4l2_get_subdev_hostdata(&state->sd)) clk_freq = *((u32 *)v4l2_get_subdev_hostdata(&state->sd)); /* * Come out of digital power down * The CX23888, at least, needs this, otherwise registers aside from * 0x0-0x2 can't be read or written. */ cx25840_write(client, 0x000, 0); /* Internal Reset */ cx25840_and_or(client, 0x102, ~0x01, 0x01); cx25840_and_or(client, 0x102, ~0x01, 0x00); /* Stop microcontroller */ cx25840_and_or(client, 0x803, ~0x10, 0x00); /* DIF in reset? */ cx25840_write(client, 0x398, 0); /* * Trust the default xtal, no division * '885: 28.636363... MHz * '887: 25.000000 MHz * '888: 50.000000 MHz */ cx25840_write(client, 0x2, 0x76); /* Power up all the PLL's and DLL */ cx25840_write(client, 0x1, 0x40); /* Sys PLL */ switch (state->id) { case CX23888_AV: /* * 50.0 MHz * (0xb + 0xe8ba26/0x2000000)/4 = 5 * 28.636363 MHz * 572.73 MHz before post divide */ if (clk_freq == 25000000) { /* 888/ImpactVCBe or 25Mhz xtal */ ; /* nothing to do */ } else { /* HVR1850 or 50MHz xtal */ cx25840_write(client, 0x2, 0x71); } cx25840_write4(client, 0x11c, 0x01d1744c); cx25840_write4(client, 0x118, 0x00000416); cx25840_write4(client, 0x404, 0x0010253e); cx25840_write4(client, 0x42c, 0x42600000); cx25840_write4(client, 0x44c, 0x161f1000); break; case CX23887_AV: /* * 25.0 MHz * (0x16 + 0x1d1744c/0x2000000)/4 = 5 * 28.636363 MHz * 572.73 MHz before post divide */ cx25840_write4(client, 0x11c, 0x01d1744c); cx25840_write4(client, 0x118, 0x00000416); break; case CX23885_AV: default: /* * 28.636363 MHz * (0x14 + 0x0/0x2000000)/4 = 5 * 28.636363 MHz * 572.73 MHz before post divide */ cx25840_write4(client, 0x11c, 0x00000000); cx25840_write4(client, 0x118, 0x00000414); break; } /* Disable DIF bypass */ cx25840_write4(client, 0x33c, 0x00000001); /* DIF Src phase inc */ cx25840_write4(client, 0x340, 0x0df7df83); /* * Vid PLL * Setup for a BT.656 pixel clock of 13.5 Mpixels/second * * 28.636363 MHz * (0xf + 0x02be2c9/0x2000000)/4 = 8 * 13.5 MHz * 432.0 MHz before post divide */ /* HVR1850 */ switch (state->id) { case CX23888_AV: if (clk_freq == 25000000) { /* 888/ImpactVCBe or 25MHz xtal */ cx25840_write4(client, 0x10c, 0x01b6db7b); cx25840_write4(client, 0x108, 0x00000512); } else { /* 888/HVR1250 or 50MHz xtal */ cx25840_write4(client, 0x10c, 0x13333333); cx25840_write4(client, 0x108, 0x00000515); } break; default: cx25840_write4(client, 0x10c, 0x002be2c9); cx25840_write4(client, 0x108, 0x0000040f); } /* Luma */ cx25840_write4(client, 0x414, 0x00107d12); /* Chroma */ if (is_cx23888(state)) cx25840_write4(client, 0x418, 0x1d008282); else cx25840_write4(client, 0x420, 0x3d008282); /* * Aux PLL * Initial setup for audio sample clock: * 48 ksps, 16 bits/sample, x160 multiplier = 122.88 MHz * Initial I2S output/master clock(?): * 48 ksps, 16 bits/sample, x16 multiplier = 12.288 MHz */ switch (state->id) { case CX23888_AV: /* * 50.0 MHz * (0x7 + 0x0bedfa4/0x2000000)/3 = 122.88 MHz * 368.64 MHz before post divide * 122.88 MHz / 0xa = 12.288 MHz */ /* HVR1850 or 50MHz xtal or 25MHz xtal */ cx25840_write4(client, 0x114, 0x017dbf48); cx25840_write4(client, 0x110, 0x000a030e); break; case CX23887_AV: /* * 25.0 MHz * (0xe + 0x17dbf48/0x2000000)/3 = 122.88 MHz * 368.64 MHz before post divide * 122.88 MHz / 0xa = 12.288 MHz */ cx25840_write4(client, 0x114, 0x017dbf48); cx25840_write4(client, 0x110, 0x000a030e); break; case CX23885_AV: default: /* * 28.636363 MHz * (0xc + 0x1bf0c9e/0x2000000)/3 = 122.88 MHz * 368.64 MHz before post divide * 122.88 MHz / 0xa = 12.288 MHz */ cx25840_write4(client, 0x114, 0x01bf0c9e); cx25840_write4(client, 0x110, 0x000a030c); break; } /* ADC2 input select */ cx25840_write(client, 0x102, 0x10); /* VIN1 & VIN5 */ cx25840_write(client, 0x103, 0x11); /* Enable format auto detect */ cx25840_write(client, 0x400, 0); /* Fast subchroma lock */ /* White crush, Chroma AGC & Chroma Killer enabled */ cx25840_write(client, 0x401, 0xe8); /* Select AFE clock pad output source */ cx25840_write(client, 0x144, 0x05); /* Drive GPIO2 direction and values for HVR1700 * where an onboard mux selects the output of demodulator * vs the 417. Failure to set this results in no DTV. * It's safe to set this across all Hauppauge boards * currently, regardless of the board type. */ cx25840_write(client, 0x160, 0x1d); cx25840_write(client, 0x164, 0x00); /* * Do the firmware load in a work handler to prevent. * Otherwise the kernel is blocked waiting for the * bit-banging i2c interface to finish uploading the * firmware. */ INIT_WORK(&state->fw_work, cx25840_work_handler); init_waitqueue_head(&state->fw_wait); q = create_singlethread_workqueue("cx25840_fw"); if (q) { prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); queue_work(q, &state->fw_work); schedule(); finish_wait(&state->fw_wait, &wait); destroy_workqueue(q); } /* * Call the cx23888 specific std setup func, we no longer rely on * the generic cx24840 func. */ if (is_cx23888(state)) cx23888_std_setup(client); else cx25840_std_setup(client); /* (re)set input */ set_input(client, state->vid_input, state->aud_input); /* start microcontroller */ cx25840_and_or(client, 0x803, ~0x10, 0x10); /* Disable and clear video interrupts - we don't use them */ cx25840_write4(client, CX25840_VID_INT_STAT_REG, 0xffffffff); /* Disable and clear audio interrupts - we don't use them */ cx25840_write(client, CX25840_AUD_INT_CTRL_REG, 0xff); cx25840_write(client, CX25840_AUD_INT_STAT_REG, 0xff); /* CC raw enable */ /* * - VIP 1.1 control codes - 10bit, blue field enable. * - enable raw data during vertical blanking. * - enable ancillary Data insertion for 656 or VIP. */ cx25840_write4(client, 0x404, 0x0010253e); /* CC on - VBI_LINE_CTRL3, FLD_VBI_MD_LINE12 */ cx25840_write(client, state->vbi_regs_offset + 0x42f, 0x66); /* HVR-1250 / HVR1850 DIF related */ /* Power everything up */ cx25840_write4(client, 0x130, 0x0); /* SRC_COMB_CFG */ if (is_cx23888(state)) cx25840_write4(client, 0x454, 0x6628021F); else cx25840_write4(client, 0x478, 0x6628021F); /* AFE_CLK_OUT_CTRL - Select the clock output source as output */ cx25840_write4(client, 0x144, 0x5); /* I2C_OUT_CTL - I2S output configuration as * Master, Sony, Left justified, left sample on WS=1 */ cx25840_write4(client, 0x918, 0x1a0); /* AFE_DIAG_CTRL1 */ cx25840_write4(client, 0x134, 0x000a1800); /* AFE_DIAG_CTRL3 - Inverted Polarity for Audio and Video */ cx25840_write4(client, 0x13c, 0x00310000); } /* ----------------------------------------------------------------------- */ static void cx231xx_initialize(struct i2c_client *client) { DEFINE_WAIT(wait); struct cx25840_state *state = to_state(i2c_get_clientdata(client)); struct workqueue_struct *q; /* Internal Reset */ cx25840_and_or(client, 0x102, ~0x01, 0x01); cx25840_and_or(client, 0x102, ~0x01, 0x00); /* Stop microcontroller */ cx25840_and_or(client, 0x803, ~0x10, 0x00); /* DIF in reset? */ cx25840_write(client, 0x398, 0); /* Trust the default xtal, no division */ /* This changes for the cx23888 products */ cx25840_write(client, 0x2, 0x76); /* Bring down the regulator for AUX clk */ cx25840_write(client, 0x1, 0x40); /* Disable DIF bypass */ cx25840_write4(client, 0x33c, 0x00000001); /* DIF Src phase inc */ cx25840_write4(client, 0x340, 0x0df7df83); /* Luma */ cx25840_write4(client, 0x414, 0x00107d12); /* Chroma */ cx25840_write4(client, 0x420, 0x3d008282); /* ADC2 input select */ cx25840_write(client, 0x102, 0x10); /* VIN1 & VIN5 */ cx25840_write(client, 0x103, 0x11); /* Enable format auto detect */ cx25840_write(client, 0x400, 0); /* Fast subchroma lock */ /* White crush, Chroma AGC & Chroma Killer enabled */ cx25840_write(client, 0x401, 0xe8); /* * Do the firmware load in a work handler to prevent. * Otherwise the kernel is blocked waiting for the * bit-banging i2c interface to finish uploading the * firmware. */ INIT_WORK(&state->fw_work, cx25840_work_handler); init_waitqueue_head(&state->fw_wait); q = create_singlethread_workqueue("cx25840_fw"); if (q) { prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); queue_work(q, &state->fw_work); schedule(); finish_wait(&state->fw_wait, &wait); destroy_workqueue(q); } cx25840_std_setup(client); /* (re)set input */ set_input(client, state->vid_input, state->aud_input); /* start microcontroller */ cx25840_and_or(client, 0x803, ~0x10, 0x10); /* CC raw enable */ cx25840_write(client, 0x404, 0x0b); /* CC on */ cx25840_write(client, 0x42f, 0x66); cx25840_write4(client, 0x474, 0x1e1e601a); } /* ----------------------------------------------------------------------- */ void cx25840_std_setup(struct i2c_client *client) { struct cx25840_state *state = to_state(i2c_get_clientdata(client)); v4l2_std_id std = state->std; int hblank, hactive, burst, vblank, vactive, sc; int vblank656, src_decimation; int luma_lpf, uv_lpf, comb; u32 pll_int, pll_frac, pll_post; /* datasheet startup, step 8d */ if (std & ~V4L2_STD_NTSC) cx25840_write(client, 0x49f, 0x11); else cx25840_write(client, 0x49f, 0x14); /* generic mode uses the values that the chip autoconfig would set */ if (std & V4L2_STD_625_50) { hblank = 132; hactive = 720; burst = 93; if (state->generic_mode) { vblank = 34; vactive = 576; vblank656 = 38; } else { vblank = 36; vactive = 580; vblank656 = 40; } src_decimation = 0x21f; luma_lpf = 2; if (std & V4L2_STD_SECAM) { uv_lpf = 0; comb = 0; sc = 0x0a425f; } else if (std == V4L2_STD_PAL_Nc) { if (state->generic_mode) { burst = 95; luma_lpf = 1; } uv_lpf = 1; comb = 0x20; sc = 556453; } else { uv_lpf = 1; comb = 0x20; sc = 688739; } } else { hactive = 720; hblank = 122; vactive = 487; luma_lpf = 1; uv_lpf = 1; if (state->generic_mode) { vblank = 20; vblank656 = 24; } src_decimation = 0x21f; if (std == V4L2_STD_PAL_60) { if (!state->generic_mode) { vblank = 26; vblank656 = 26; burst = 0x5b; } else { burst = 0x59; } luma_lpf = 2; comb = 0x20; sc = 688739; } else if (std == V4L2_STD_PAL_M) { vblank = 20; vblank656 = 24; burst = 0x61; comb = 0x20; sc = 555452; } else { if (!state->generic_mode) { vblank = 26; vblank656 = 26; } burst = 0x5b; comb = 0x66; sc = 556063; } } /* DEBUG: Displays configured PLL frequency */ if (!is_cx231xx(state)) { pll_int = cx25840_read(client, 0x108); pll_frac = cx25840_read4(client, 0x10c) & 0x1ffffff; pll_post = cx25840_read(client, 0x109); v4l_dbg(1, cx25840_debug, client, "PLL regs = int: %u, frac: %u, post: %u\n", pll_int, pll_frac, pll_post); if (pll_post) { int fin, fsc; int pll = (28636363L * ((((u64)pll_int) << 25L) + pll_frac)) >> 25L; pll /= pll_post; v4l_dbg(1, cx25840_debug, client, "PLL = %d.%06d MHz\n", pll / 1000000, pll % 1000000); v4l_dbg(1, cx25840_debug, client, "PLL/8 = %d.%06d MHz\n", pll / 8000000, (pll / 8) % 1000000); fin = ((u64)src_decimation * pll) >> 12; v4l_dbg(1, cx25840_debug, client, "ADC Sampling freq = %d.%06d MHz\n", fin / 1000000, fin % 1000000); fsc = (((u64)sc) * pll) >> 24L; v4l_dbg(1, cx25840_debug, client, "Chroma sub-carrier freq = %d.%06d MHz\n", fsc / 1000000, fsc % 1000000); v4l_dbg(1, cx25840_debug, client, "hblank %i, hactive %i, vblank %i, vactive %i, vblank656 %i, src_dec %i, burst 0x%02x, luma_lpf %i, uv_lpf %i, comb 0x%02x, sc 0x%06x\n", hblank, hactive, vblank, vactive, vblank656, src_decimation, burst, luma_lpf, uv_lpf, comb, sc); } } /* Sets horizontal blanking delay and active lines */ cx25840_write(client, 0x470, hblank); cx25840_write(client, 0x471, (((hblank >> 8) & 0x3) | (hactive << 4)) & 0xff); cx25840_write(client, 0x472, hactive >> 4); /* Sets burst gate delay */ cx25840_write(client, 0x473, burst); /* Sets vertical blanking delay and active duration */ cx25840_write(client, 0x474, vblank); cx25840_write(client, 0x475, (((vblank >> 8) & 0x3) | (vactive << 4)) & 0xff); cx25840_write(client, 0x476, vactive >> 4); cx25840_write(client, 0x477, vblank656); /* Sets src decimation rate */ cx25840_write(client, 0x478, src_decimation & 0xff); cx25840_write(client, 0x479, (src_decimation >> 8) & 0xff); /* Sets Luma and UV Low pass filters */ cx25840_write(client, 0x47a, luma_lpf << 6 | ((uv_lpf << 4) & 0x30)); /* Enables comb filters */ cx25840_write(client, 0x47b, comb); /* Sets SC Step*/ cx25840_write(client, 0x47c, sc); cx25840_write(client, 0x47d, (sc >> 8) & 0xff); cx25840_write(client, 0x47e, (sc >> 16) & 0xff); /* Sets VBI parameters */ if (std & V4L2_STD_625_50) { cx25840_write(client, 0x47f, 0x01); state->vbi_line_offset = 5; } else { cx25840_write(client, 0x47f, 0x00); state->vbi_line_offset = 8; } } /* ----------------------------------------------------------------------- */ static void input_change(struct i2c_client *client) { struct cx25840_state *state = to_state(i2c_get_clientdata(client)); v4l2_std_id std = state->std; /* Follow step 8c and 8d of section 3.16 in the cx25840 datasheet */ if (std & V4L2_STD_SECAM) { cx25840_write(client, 0x402, 0); } else { cx25840_write(client, 0x402, 0x04); cx25840_write(client, 0x49f, (std & V4L2_STD_NTSC) ? 0x14 : 0x11); } cx25840_and_or(client, 0x401, ~0x60, 0); cx25840_and_or(client, 0x401, ~0x60, 0x60); /* Don't write into audio registers on cx2583x chips */ if (is_cx2583x(state)) return; cx25840_and_or(client, 0x810, ~0x01, 1); if (state->radio) { cx25840_write(client, 0x808, 0xf9); cx25840_write(client, 0x80b, 0x00); } else if (std & V4L2_STD_525_60) { /* * Certain Hauppauge PVR150 models have a hardware bug * that causes audio to drop out. For these models the * audio standard must be set explicitly. * To be precise: it affects cards with tuner models * 85, 99 and 112 (model numbers from tveeprom). */ int hw_fix = state->pvr150_workaround; if (std == V4L2_STD_NTSC_M_JP) { /* Japan uses EIAJ audio standard */ cx25840_write(client, 0x808, hw_fix ? 0x2f : 0xf7); } else if (std == V4L2_STD_NTSC_M_KR) { /* South Korea uses A2 audio standard */ cx25840_write(client, 0x808, hw_fix ? 0x3f : 0xf8); } else { /* Others use the BTSC audio standard */ cx25840_write(client, 0x808, hw_fix ? 0x1f : 0xf6); } cx25840_write(client, 0x80b, 0x00); } else if (std & V4L2_STD_PAL) { /* Autodetect audio standard and audio system */ cx25840_write(client, 0x808, 0xff); /* * Since system PAL-L is pretty much non-existent and * not used by any public broadcast network, force * 6.5 MHz carrier to be interpreted as System DK, * this avoids DK audio detection instability */ cx25840_write(client, 0x80b, 0x00); } else if (std & V4L2_STD_SECAM) { /* Autodetect audio standard and audio system */ cx25840_write(client, 0x808, 0xff); /* * If only one of SECAM-DK / SECAM-L is required, then force * 6.5MHz carrier, else autodetect it */ if ((std & V4L2_STD_SECAM_DK) && !(std & (V4L2_STD_SECAM_L | V4L2_STD_SECAM_LC))) { /* 6.5 MHz carrier to be interpreted as System DK */ cx25840_write(client, 0x80b, 0x00); } else if (!(std & V4L2_STD_SECAM_DK) && (std & (V4L2_STD_SECAM_L | V4L2_STD_SECAM_LC))) { /* 6.5 MHz carrier to be interpreted as System L */ cx25840_write(client, 0x80b, 0x08); } else { /* 6.5 MHz carrier to be autodetected */ cx25840_write(client, 0x80b, 0x10); } } cx25840_and_or(client, 0x810, ~0x01, 0); } static int set_input(struct i2c_client *client, enum cx25840_video_input vid_input, enum cx25840_audio_input aud_input) { struct cx25840_state *state = to_state(i2c_get_clientdata(client)); u8 is_composite = (vid_input >= CX25840_COMPOSITE1 && vid_input <= CX25840_COMPOSITE8); u8 is_component = (vid_input & CX25840_COMPONENT_ON) == CX25840_COMPONENT_ON; u8 is_dif = (vid_input & CX25840_DIF_ON) == CX25840_DIF_ON; u8 is_svideo = (vid_input & CX25840_SVIDEO_ON) == CX25840_SVIDEO_ON; int luma = vid_input & 0xf0; int chroma = vid_input & 0xf00; u8 reg; u32 val; v4l_dbg(1, cx25840_debug, client, "decoder set video input %d, audio input %d\n", vid_input, aud_input); if (vid_input >= CX25840_VIN1_CH1) { v4l_dbg(1, cx25840_debug, client, "vid_input 0x%x\n", vid_input); reg = vid_input & 0xff; is_composite = !is_component && ((vid_input & CX25840_SVIDEO_ON) != CX25840_SVIDEO_ON); v4l_dbg(1, cx25840_debug, client, "mux cfg 0x%x comp=%d\n", reg, is_composite); } else if (is_composite) { reg = 0xf0 + (vid_input - CX25840_COMPOSITE1); } else { if ((vid_input & ~0xff0) || luma < CX25840_SVIDEO_LUMA1 || luma > CX25840_SVIDEO_LUMA8 || chroma < CX25840_SVIDEO_CHROMA4 || chroma > CX25840_SVIDEO_CHROMA8) { v4l_err(client, "0x%04x is not a valid video input!\n", vid_input); return -EINVAL; } reg = 0xf0 + ((luma - CX25840_SVIDEO_LUMA1) >> 4); if (chroma >= CX25840_SVIDEO_CHROMA7) { reg &= 0x3f; reg |= (chroma - CX25840_SVIDEO_CHROMA7) >> 2; } else { reg &= 0xcf; reg |= (chroma - CX25840_SVIDEO_CHROMA4) >> 4; } } /* The caller has previously prepared the correct routing * configuration in reg (for the cx23885) so we have no * need to attempt to flip bits for earlier av decoders. */ if (!is_cx2388x(state) && !is_cx231xx(state)) { switch (aud_input) { case CX25840_AUDIO_SERIAL: /* do nothing, use serial audio input */ break; case CX25840_AUDIO4: reg &= ~0x30; break; case CX25840_AUDIO5: reg &= ~0x30; reg |= 0x10; break; case CX25840_AUDIO6: reg &= ~0x30; reg |= 0x20; break; case CX25840_AUDIO7: reg &= ~0xc0; break; case CX25840_AUDIO8: reg &= ~0xc0; reg |= 0x40; break; default: v4l_err(client, "0x%04x is not a valid audio input!\n", aud_input); return -EINVAL; } } cx25840_write(client, 0x103, reg); /* Set INPUT_MODE to Composite, S-Video or Component */ if (is_component) cx25840_and_or(client, 0x401, ~0x6, 0x6); else cx25840_and_or(client, 0x401, ~0x6, is_composite ? 0 : 0x02); if (is_cx2388x(state)) { /* Enable or disable the DIF for tuner use */ if (is_dif) { cx25840_and_or(client, 0x102, ~0x80, 0x80); /* Set of defaults for NTSC and PAL */ cx25840_write4(client, 0x31c, 0xc2262600); cx25840_write4(client, 0x320, 0xc2262600); /* 18271 IF - Nobody else yet uses a different * tuner with the DIF, so these are reasonable * assumptions (HVR1250 and HVR1850 specific). */ cx25840_write4(client, 0x318, 0xda262600); cx25840_write4(client, 0x33c, 0x2a24c800); cx25840_write4(client, 0x104, 0x0704dd00); } else { cx25840_write4(client, 0x300, 0x015c28f5); cx25840_and_or(client, 0x102, ~0x80, 0); cx25840_write4(client, 0x340, 0xdf7df83); cx25840_write4(client, 0x104, 0x0704dd80); cx25840_write4(client, 0x314, 0x22400600); cx25840_write4(client, 0x318, 0x40002600); cx25840_write4(client, 0x324, 0x40002600); cx25840_write4(client, 0x32c, 0x0250e620); cx25840_write4(client, 0x39c, 0x01FF0B00); cx25840_write4(client, 0x410, 0xffff0dbf); cx25840_write4(client, 0x414, 0x00137d03); if (is_cx23888(state)) { /* 888 MISC_TIM_CTRL */ cx25840_write4(client, 0x42c, 0x42600000); /* 888 FIELD_COUNT */ cx25840_write4(client, 0x430, 0x0000039b); /* 888 VSCALE_CTRL */ cx25840_write4(client, 0x438, 0x00000000); /* 888 DFE_CTRL1 */ cx25840_write4(client, 0x440, 0xF8E3E824); /* 888 DFE_CTRL2 */ cx25840_write4(client, 0x444, 0x401040dc); /* 888 DFE_CTRL3 */ cx25840_write4(client, 0x448, 0xcd3f02a0); /* 888 PLL_CTRL */ cx25840_write4(client, 0x44c, 0x161f1000); /* 888 HTL_CTRL */ cx25840_write4(client, 0x450, 0x00000802); } cx25840_write4(client, 0x91c, 0x01000000); cx25840_write4(client, 0x8e0, 0x03063870); cx25840_write4(client, 0x8d4, 0x7FFF0024); cx25840_write4(client, 0x8d0, 0x00063073); cx25840_write4(client, 0x8c8, 0x00010000); cx25840_write4(client, 0x8cc, 0x00080023); /* DIF BYPASS */ cx25840_write4(client, 0x33c, 0x2a04c800); } /* Reset the DIF */ cx25840_write4(client, 0x398, 0); } if (!is_cx2388x(state) && !is_cx231xx(state)) { /* Set CH_SEL_ADC2 to 1 if input comes from CH3 */ cx25840_and_or(client, 0x102, ~0x2, (reg & 0x80) == 0 ? 2 : 0); /* Set DUAL_MODE_ADC2 to 1 if input comes from both CH2&CH3 */ if ((reg & 0xc0) != 0xc0 && (reg & 0x30) != 0x30) cx25840_and_or(client, 0x102, ~0x4, 4); else cx25840_and_or(client, 0x102, ~0x4, 0); } else { /* Set DUAL_MODE_ADC2 to 1 if component*/ cx25840_and_or(client, 0x102, ~0x4, is_component ? 0x4 : 0x0); if (is_composite) { /* ADC2 input select channel 2 */ cx25840_and_or(client, 0x102, ~0x2, 0); } else if (!is_component) { /* S-Video */ if (chroma >= CX25840_SVIDEO_CHROMA7) { /* ADC2 input select channel 3 */ cx25840_and_or(client, 0x102, ~0x2, 2); } else { /* ADC2 input select channel 2 */ cx25840_and_or(client, 0x102, ~0x2, 0); } } /* cx23885 / SVIDEO */ if (is_cx2388x(state) && is_svideo) { #define AFE_CTRL (0x104) #define MODE_CTRL (0x400) cx25840_and_or(client, 0x102, ~0x2, 0x2); val = cx25840_read4(client, MODE_CTRL); val &= 0xFFFFF9FF; /* YC */ val |= 0x00000200; val &= ~0x2000; cx25840_write4(client, MODE_CTRL, val); val = cx25840_read4(client, AFE_CTRL); /* Chroma in select */ val |= 0x00001000; val &= 0xfffffe7f; /* Clear VGA_SEL_CH2 and VGA_SEL_CH3 (bits 7 and 8). * This sets them to use video rather than audio. * Only one of the two will be in use. */ cx25840_write4(client, AFE_CTRL, val); } else { cx25840_and_or(client, 0x102, ~0x2, 0); } } state->vid_input = vid_input; state->aud_input = aud_input; cx25840_audio_set_path(client); input_change(client); if (is_cx2388x(state)) { /* Audio channel 1 src : Parallel 1 */ cx25840_write(client, 0x124, 0x03); /* Select AFE clock pad output source */ cx25840_write(client, 0x144, 0x05); /* I2S_IN_CTL: I2S_IN_SONY_MODE, LEFT SAMPLE on WS=1 */ cx25840_write(client, 0x914, 0xa0); /* I2S_OUT_CTL: * I2S_IN_SONY_MODE, LEFT SAMPLE on WS=1 * I2S_OUT_MASTER_MODE = Master */ cx25840_write(client, 0x918, 0xa0); cx25840_write(client, 0x919, 0x01); } else if (is_cx231xx(state)) { /* Audio channel 1 src : Parallel 1 */ cx25840_write(client, 0x124, 0x03); /* I2S_IN_CTL: I2S_IN_SONY_MODE, LEFT SAMPLE on WS=1 */ cx