| 8 8 8 8 8 8 8 126 136 136 11 95 13 91 127 137 137 115 26 125 10 15 9 6 6 1 11 1 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 | // SPDX-License-Identifier: GPL-2.0 /* * SHA-1 and HMAC-SHA1 library functions */ #include <crypto/hmac.h> #include <crypto/sha1.h> #include <linux/bitops.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include <linux/unaligned.h> #include <linux/wordpart.h> #include "fips.h" static const struct sha1_block_state sha1_iv = { .h = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, }; /* * If you have 32 registers or more, the compiler can (and should) * try to change the array[] accesses into registers. However, on * machines with less than ~25 registers, that won't really work, * and at least gcc will make an unholy mess of it. * * So to avoid that mess which just slows things down, we force * the stores to memory to actually happen (we might be better off * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as * suggested by Artur Skawina - that will also make gcc unable to * try to do the silly "optimize away loads" part because it won't * see what the value will be). * * Ben Herrenschmidt reports that on PPC, the C version comes close * to the optimized asm with this (ie on PPC you don't want that * 'volatile', since there are lots of registers). * * On ARM we get the best code generation by forcing a full memory barrier * between each SHA_ROUND, otherwise gcc happily get wild with spilling and * the stack frame size simply explode and performance goes down the drain. */ #ifdef CONFIG_X86 #define setW(x, val) (*(volatile __u32 *)&W(x) = (val)) #elif defined(CONFIG_ARM) #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0) #else #define setW(x, val) (W(x) = (val)) #endif /* This "rolls" over the 512-bit array */ #define W(x) (array[(x)&15]) /* * Where do we get the source from? The first 16 iterations get it from * the input data, the next mix it from the 512-bit array. */ #define SHA_SRC(t) get_unaligned_be32((__u32 *)data + t) #define SHA_MIX(t) rol32(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1) #define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \ __u32 TEMP = input(t); setW(t, TEMP); \ E += TEMP + rol32(A,5) + (fn) + (constant); \ B = ror32(B, 2); \ TEMP = E; E = D; D = C; C = B; B = A; A = TEMP; } while (0) #define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) #define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) #define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E ) #define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E ) #define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E ) /** * sha1_transform - single block SHA1 transform (deprecated) * * @digest: 160 bit digest to update * @data: 512 bits of data to hash * @array: 16 words of workspace (see note) * * This function executes SHA-1's internal compression function. It updates the * 160-bit internal state (@digest) with a single 512-bit data block (@data). * * Don't use this function. SHA-1 is no longer considered secure. And even if * you do have to use SHA-1, this isn't the correct way to hash something with * SHA-1 as this doesn't handle padding and finalization. * * Note: If the hash is security sensitive, the caller should be sure * to clear the workspace. This is left to the caller to avoid * unnecessary clears between chained hashing operations. */ void sha1_transform(__u32 *digest, const char *data, __u32 *array) { __u32 A, B, C, D, E; unsigned int i = 0; A = digest[0]; B = digest[1]; C = digest[2]; D = digest[3]; E = digest[4]; /* Round 1 - iterations 0-16 take their input from 'data' */ for (; i < 16; ++i) T_0_15(i, A, B, C, D, E); /* Round 1 - tail. Input from 512-bit mixing array */ for (; i < 20; ++i) T_16_19(i, A, B, C, D, E); /* Round 2 */ for (; i < 40; ++i) T_20_39(i, A, B, C, D, E); /* Round 3 */ for (; i < 60; ++i) T_40_59(i, A, B, C, D, E); /* Round 4 */ for (; i < 80; ++i) T_60_79(i, A, B, C, D, E); digest[0] += A; digest[1] += B; digest[2] += C; digest[3] += D; digest[4] += E; } EXPORT_SYMBOL(sha1_transform); /** * sha1_init_raw - initialize the vectors for a SHA1 digest * @buf: vector to initialize */ void sha1_init_raw(__u32 *buf) { buf[0] = 0x67452301; buf[1] = 0xefcdab89; buf[2] = 0x98badcfe; buf[3] = 0x10325476; buf[4] = 0xc3d2e1f0; } EXPORT_SYMBOL(sha1_init_raw); static void __maybe_unused sha1_blocks_generic(struct sha1_block_state *state, const u8 *data, size_t nblocks) { u32 workspace[SHA1_WORKSPACE_WORDS]; do { sha1_transform(state->h, data, workspace); data += SHA1_BLOCK_SIZE; } while (--nblocks); memzero_explicit(workspace, sizeof(workspace)); } #ifdef CONFIG_CRYPTO_LIB_SHA1_ARCH #include "sha1.h" /* $(SRCARCH)/sha1.h */ #else #define sha1_blocks sha1_blocks_generic #endif void sha1_init(struct sha1_ctx *ctx) { ctx->state = sha1_iv; ctx->bytecount = 0; } EXPORT_SYMBOL_GPL(sha1_init); void sha1_update(struct sha1_ctx *ctx, const u8 *data, size_t len) { size_t partial = ctx->bytecount % SHA1_BLOCK_SIZE; ctx->bytecount += len; if (partial + len >= SHA1_BLOCK_SIZE) { size_t nblocks; if (partial) { size_t l = SHA1_BLOCK_SIZE - partial; memcpy(&ctx->buf[partial], data, l); data += l; len -= l; sha1_blocks(&ctx->state, ctx->buf, 1); } nblocks = len / SHA1_BLOCK_SIZE; len %= SHA1_BLOCK_SIZE; if (nblocks) { sha1_blocks(&ctx->state, data, nblocks); data += nblocks * SHA1_BLOCK_SIZE; } partial = 0; } if (len) memcpy(&ctx->buf[partial], data, len); } EXPORT_SYMBOL_GPL(sha1_update); static void __sha1_final(struct sha1_ctx *ctx, u8 out[SHA1_DIGEST_SIZE]) { u64 bitcount = ctx->bytecount << 3; size_t partial = ctx->bytecount % SHA1_BLOCK_SIZE; ctx->buf[partial++] = 0x80; if (partial > SHA1_BLOCK_SIZE - 8) { memset(&ctx->buf[partial], 0, SHA1_BLOCK_SIZE - partial); sha1_blocks(&ctx->state, ctx->buf, 1); partial = 0; } memset(&ctx->buf[partial], 0, SHA1_BLOCK_SIZE - 8 - partial); *(__be64 *)&ctx->buf[SHA1_BLOCK_SIZE - 8] = cpu_to_be64(bitcount); sha1_blocks(&ctx->state, ctx->buf, 1); for (size_t i = 0; i < SHA1_DIGEST_SIZE; i += 4) put_unaligned_be32(ctx->state.h[i / 4], out + i); } void sha1_final(struct sha1_ctx *ctx, u8 out[SHA1_DIGEST_SIZE]) { __sha1_final(ctx, out); memzero_explicit(ctx, sizeof(*ctx)); } EXPORT_SYMBOL_GPL(sha1_final); void sha1(const u8 *data, size_t len, u8 out[SHA1_DIGEST_SIZE]) { struct sha1_ctx ctx; sha1_init(&ctx); sha1_update(&ctx, data, len); sha1_final(&ctx, out); } EXPORT_SYMBOL_GPL(sha1); static void __hmac_sha1_preparekey(struct sha1_block_state *istate, struct sha1_block_state *ostate, const u8 *raw_key, size_t raw_key_len) { union { u8 b[SHA1_BLOCK_SIZE]; unsigned long w[SHA1_BLOCK_SIZE / sizeof(unsigned long)]; } derived_key = { 0 }; if (unlikely(raw_key_len > SHA1_BLOCK_SIZE)) sha1(raw_key, raw_key_len, derived_key.b); else memcpy(derived_key.b, raw_key, raw_key_len); for (size_t i = 0; i < ARRAY_SIZE(derived_key.w); i++) derived_key.w[i] ^= REPEAT_BYTE(HMAC_IPAD_VALUE); *istate = sha1_iv; sha1_blocks(istate, derived_key.b, 1); for (size_t i = 0; i < ARRAY_SIZE(derived_key.w); i++) derived_key.w[i] ^= REPEAT_BYTE(HMAC_OPAD_VALUE ^ HMAC_IPAD_VALUE); *ostate = sha1_iv; sha1_blocks(ostate, derived_key.b, 1); memzero_explicit(&derived_key, sizeof(derived_key)); } void hmac_sha1_preparekey(struct hmac_sha1_key *key, const u8 *raw_key, size_t raw_key_len) { __hmac_sha1_preparekey(&key->istate, &key->ostate, raw_key, raw_key_len); } EXPORT_SYMBOL_GPL(hmac_sha1_preparekey); void hmac_sha1_init(struct hmac_sha1_ctx *ctx, const struct hmac_sha1_key *key) { ctx->sha_ctx.state = key->istate; ctx->sha_ctx.bytecount = SHA1_BLOCK_SIZE; ctx->ostate = key->ostate; } EXPORT_SYMBOL_GPL(hmac_sha1_init); void hmac_sha1_init_usingrawkey(struct hmac_sha1_ctx *ctx, const u8 *raw_key, size_t raw_key_len) { __hmac_sha1_preparekey(&ctx->sha_ctx.state, &ctx->ostate, raw_key, raw_key_len); ctx->sha_ctx.bytecount = SHA1_BLOCK_SIZE; } EXPORT_SYMBOL_GPL(hmac_sha1_init_usingrawkey); void hmac_sha1_final(struct hmac_sha1_ctx *ctx, u8 out[SHA1_DIGEST_SIZE]) { /* Generate the padded input for the outer hash in ctx->sha_ctx.buf. */ __sha1_final(&ctx->sha_ctx, ctx->sha_ctx.buf); memset(&ctx->sha_ctx.buf[SHA1_DIGEST_SIZE], 0, SHA1_BLOCK_SIZE - SHA1_DIGEST_SIZE); ctx->sha_ctx.buf[SHA1_DIGEST_SIZE] = 0x80; *(__be32 *)&ctx->sha_ctx.buf[SHA1_BLOCK_SIZE - 4] = cpu_to_be32(8 * (SHA1_BLOCK_SIZE + SHA1_DIGEST_SIZE)); /* Compute the outer hash, which gives the HMAC value. */ sha1_blocks(&ctx->ostate, ctx->sha_ctx.buf, 1); for (size_t i = 0; i < SHA1_DIGEST_SIZE; i += 4) put_unaligned_be32(ctx->ostate.h[i / 4], out + i); memzero_explicit(ctx, sizeof(*ctx)); } EXPORT_SYMBOL_GPL(hmac_sha1_final); void hmac_sha1(const struct hmac_sha1_key *key, const u8 *data, size_t data_len, u8 out[SHA1_DIGEST_SIZE]) { struct hmac_sha1_ctx ctx; hmac_sha1_init(&ctx, key); hmac_sha1_update(&ctx, data, data_len); hmac_sha1_final(&ctx, out); } EXPORT_SYMBOL_GPL(hmac_sha1); void hmac_sha1_usingrawkey(const u8 *raw_key, size_t raw_key_len, const u8 *data, size_t data_len, u8 out[SHA1_DIGEST_SIZE]) { struct hmac_sha1_ctx ctx; hmac_sha1_init_usingrawkey(&ctx, raw_key, raw_key_len); hmac_sha1_update(&ctx, data, data_len); hmac_sha1_final(&ctx, out); } EXPORT_SYMBOL_GPL(hmac_sha1_usingrawkey); #if defined(sha1_mod_init_arch) || defined(CONFIG_CRYPTO_FIPS) static int __init sha1_mod_init(void) { #ifdef sha1_mod_init_arch sha1_mod_init_arch(); #endif if (fips_enabled) { /* * FIPS cryptographic algorithm self-test. As per the FIPS * Implementation Guidance, testing HMAC-SHA1 satisfies the test * requirement for SHA-1 too. */ u8 mac[SHA1_DIGEST_SIZE]; hmac_sha1_usingrawkey(fips_test_key, sizeof(fips_test_key), fips_test_data, sizeof(fips_test_data), mac); if (memcmp(fips_test_hmac_sha1_value, mac, sizeof(mac)) != 0) panic("sha1: FIPS self-test failed\n"); } return 0; } subsys_initcall(sha1_mod_init); static void __exit sha1_mod_exit(void) { } module_exit(sha1_mod_exit); #endif MODULE_DESCRIPTION("SHA-1 and HMAC-SHA1 library functions"); MODULE_LICENSE("GPL"); |
| 2524 2684 2669 2546 2691 2555 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM rpm #if !defined(_TRACE_RUNTIME_POWER_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_RUNTIME_POWER_H #include <linux/ktime.h> #include <linux/tracepoint.h> struct device; /* * The rpm_internal events are used for tracing some important * runtime pm internal functions. */ DECLARE_EVENT_CLASS(rpm_internal, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags), TP_STRUCT__entry( __string( name, dev_name(dev) ) __field( int, flags ) __field( int , usage_count ) __field( int , disable_depth ) __field( int , runtime_auto ) __field( int , request_pending ) __field( int , irq_safe ) __field( int , child_count ) ), TP_fast_assign( __assign_str(name); __entry->flags = flags; __entry->usage_count = atomic_read( &dev->power.usage_count); __entry->disable_depth = dev->power.disable_depth; __entry->runtime_auto = dev->power.runtime_auto; __entry->request_pending = dev->power.request_pending; __entry->irq_safe = dev->power.irq_safe; __entry->child_count = atomic_read( &dev->power.child_count); ), TP_printk("%s flags-%x cnt-%-2d dep-%-2d auto-%-1d p-%-1d" " irq-%-1d child-%d", __get_str(name), __entry->flags, __entry->usage_count, __entry->disable_depth, __entry->runtime_auto, __entry->request_pending, __entry->irq_safe, __entry->child_count ) ); DEFINE_EVENT(rpm_internal, rpm_suspend, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags) ); DEFINE_EVENT(rpm_internal, rpm_resume, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags) ); DEFINE_EVENT(rpm_internal, rpm_idle, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags) ); DEFINE_EVENT(rpm_internal, rpm_usage, TP_PROTO(struct device *dev, int flags), TP_ARGS(dev, flags) ); TRACE_EVENT(rpm_return_int, TP_PROTO(struct device *dev, unsigned long ip, int ret), TP_ARGS(dev, ip, ret), TP_STRUCT__entry( __string( name, dev_name(dev)) __field( unsigned long, ip ) __field( int, ret ) ), TP_fast_assign( __assign_str(name); __entry->ip = ip; __entry->ret = ret; ), TP_printk("%pS:%s ret=%d", (void *)__entry->ip, __get_str(name), __entry->ret) ); #define RPM_STATUS_STRINGS \ EM(RPM_INVALID, "RPM_INVALID") \ EM(RPM_ACTIVE, "RPM_ACTIVE") \ EM(RPM_RESUMING, "RPM_RESUMING") \ EM(RPM_SUSPENDED, "RPM_SUSPENDED") \ EMe(RPM_SUSPENDING, "RPM_SUSPENDING") /* Enums require being exported to userspace, for user tool parsing. */ #undef EM #undef EMe #define EM(a, b) TRACE_DEFINE_ENUM(a); #define EMe(a, b) TRACE_DEFINE_ENUM(a); RPM_STATUS_STRINGS /* * Now redefine the EM() and EMe() macros to map the enums to the strings that * will be printed in the output. */ #undef EM #undef EMe #define EM(a, b) { a, b }, #define EMe(a, b) { a, b } TRACE_EVENT(rpm_status, TP_PROTO(struct device *dev, enum rpm_status status), TP_ARGS(dev, status), TP_STRUCT__entry( __string(name, dev_name(dev)) __field(int, status) ), TP_fast_assign( __assign_str(name); __entry->status = status; ), TP_printk("%s status=%s", __get_str(name), __print_symbolic(__entry->status, RPM_STATUS_STRINGS)) ); #endif /* _TRACE_RUNTIME_POWER_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
| 7 6 6 6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008 Oracle. All rights reserved. */ #include <linux/kernel.h> #include <linux/bio.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/pagevec.h> #include <linux/highmem.h> #include <linux/kthread.h> #include <linux/time.h> #include <linux/init.h> #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/writeback.h> #include <linux/psi.h> #include <linux/slab.h> #include <linux/sched/mm.h> #include <linux/log2.h> #include <linux/shrinker.h> #include <crypto/hash.h> #include "misc.h" #include "ctree.h" #include "fs.h" #include "btrfs_inode.h" #include "bio.h" #include "ordered-data.h" #include "compression.h" #include "extent_io.h" #include "extent_map.h" #include "subpage.h" #include "messages.h" #include "super.h" static struct bio_set btrfs_compressed_bioset; static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" }; const char* btrfs_compress_type2str(enum btrfs_compression_type type) { switch (type) { case BTRFS_COMPRESS_ZLIB: case BTRFS_COMPRESS_LZO: case BTRFS_COMPRESS_ZSTD: case BTRFS_COMPRESS_NONE: return btrfs_compress_types[type]; default: break; } return NULL; } static inline struct compressed_bio *to_compressed_bio(struct btrfs_bio *bbio) { return container_of(bbio, struct compressed_bio, bbio); } static struct compressed_bio *alloc_compressed_bio(struct btrfs_inode *inode, u64 start, blk_opf_t op, btrfs_bio_end_io_t end_io) { struct btrfs_bio *bbio; bbio = btrfs_bio(bio_alloc_bioset(NULL, BTRFS_MAX_COMPRESSED_PAGES, op, GFP_NOFS, &btrfs_compressed_bioset)); btrfs_bio_init(bbio, inode, start, end_io, NULL); return to_compressed_bio(bbio); } bool btrfs_compress_is_valid_type(const char *str, size_t len) { int i; for (i = 1; i < ARRAY_SIZE(btrfs_compress_types); i++) { size_t comp_len = strlen(btrfs_compress_types[i]); if (len < comp_len) continue; if (!strncmp(btrfs_compress_types[i], str, comp_len)) return true; } return false; } static int compression_compress_pages(int type, struct list_head *ws, struct btrfs_inode *inode, u64 start, struct folio **folios, unsigned long *out_folios, unsigned long *total_in, unsigned long *total_out) { switch (type) { case BTRFS_COMPRESS_ZLIB: return zlib_compress_folios(ws, inode, start, folios, out_folios, total_in, total_out); case BTRFS_COMPRESS_LZO: return lzo_compress_folios(ws, inode, start, folios, out_folios, total_in, total_out); case BTRFS_COMPRESS_ZSTD: return zstd_compress_folios(ws, inode, start, folios, out_folios, total_in, total_out); case BTRFS_COMPRESS_NONE: default: /* * This can happen when compression races with remount setting * it to 'no compress', while caller doesn't call * inode_need_compress() to check if we really need to * compress. * * Not a big deal, just need to inform caller that we * haven't allocated any pages yet. */ *out_folios = 0; return -E2BIG; } } static int compression_decompress_bio(struct list_head *ws, struct compressed_bio *cb) { switch (cb->compress_type) { case BTRFS_COMPRESS_ZLIB: return zlib_decompress_bio(ws, cb); case BTRFS_COMPRESS_LZO: return lzo_decompress_bio(ws, cb); case BTRFS_COMPRESS_ZSTD: return zstd_decompress_bio(ws, cb); case BTRFS_COMPRESS_NONE: default: /* * This can't happen, the type is validated several times * before we get here. */ BUG(); } } static int compression_decompress(int type, struct list_head *ws, const u8 *data_in, struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, size_t destlen) { switch (type) { case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_folio, dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_LZO: return lzo_decompress(ws, data_in, dest_folio, dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_folio, dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_NONE: default: /* * This can't happen, the type is validated several times * before we get here. */ BUG(); } } static void btrfs_free_compressed_folios(struct compressed_bio *cb) { for (unsigned int i = 0; i < cb->nr_folios; i++) btrfs_free_compr_folio(cb->compressed_folios[i]); kfree(cb->compressed_folios); } static int btrfs_decompress_bio(struct compressed_bio *cb); /* * Global cache of last unused pages for compression/decompression. */ static struct btrfs_compr_pool { struct shrinker *shrinker; spinlock_t lock; struct list_head list; int count; int thresh; } compr_pool; static unsigned long btrfs_compr_pool_count(struct shrinker *sh, struct shrink_control *sc) { int ret; /* * We must not read the values more than once if 'ret' gets expanded in * the return statement so we don't accidentally return a negative * number, even if the first condition finds it positive. */ ret = READ_ONCE(compr_pool.count) - READ_ONCE(compr_pool.thresh); return ret > 0 ? ret : 0; } static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_control *sc) { LIST_HEAD(remove); struct list_head *tmp, *next; int freed; if (compr_pool.count == 0) return SHRINK_STOP; /* For now, just simply drain the whole list. */ spin_lock(&compr_pool.lock); list_splice_init(&compr_pool.list, &remove); freed = compr_pool.count; compr_pool.count = 0; spin_unlock(&compr_pool.lock); list_for_each_safe(tmp, next, &remove) { struct page *page = list_entry(tmp, struct page, lru); ASSERT(page_ref_count(page) == 1); put_page(page); } return freed; } /* * Common wrappers for page allocation from compression wrappers */ struct folio *btrfs_alloc_compr_folio(struct btrfs_fs_info *fs_info) { struct folio *folio = NULL; /* For bs > ps cases, no cached folio pool for now. */ if (fs_info->block_min_order) goto alloc; spin_lock(&compr_pool.lock); if (compr_pool.count > 0) { folio = list_first_entry(&compr_pool.list, struct folio, lru); list_del_init(&folio->lru); compr_pool.count--; } spin_unlock(&compr_pool.lock); if (folio) return folio; alloc: return folio_alloc(GFP_NOFS, fs_info->block_min_order); } void btrfs_free_compr_folio(struct folio *folio) { bool do_free = false; /* The folio is from bs > ps fs, no cached pool for now. */ if (folio_order(folio)) goto free; spin_lock(&compr_pool.lock); if (compr_pool.count > compr_pool.thresh) { do_free = true; } else { list_add(&folio->lru, &compr_pool.list); compr_pool.count++; } spin_unlock(&compr_pool.lock); if (!do_free) return; free: ASSERT(folio_ref_count(folio) == 1); folio_put(folio); } static void end_bbio_compressed_read(struct btrfs_bio *bbio) { struct compressed_bio *cb = to_compressed_bio(bbio); blk_status_t status = bbio->bio.bi_status; if (!status) status = errno_to_blk_status(btrfs_decompress_bio(cb)); btrfs_free_compressed_folios(cb); btrfs_bio_end_io(cb->orig_bbio, status); bio_put(&bbio->bio); } /* * Clear the writeback bits on all of the file * pages for a compressed write */ static noinline void end_compressed_writeback(const struct compressed_bio *cb) { struct inode *inode = &cb->bbio.inode->vfs_inode; struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); pgoff_t index = cb->start >> PAGE_SHIFT; const pgoff_t end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT; struct folio_batch fbatch; int i; int ret; ret = blk_status_to_errno(cb->bbio.bio.bi_status); if (ret) mapping_set_error(inode->i_mapping, ret); folio_batch_init(&fbatch); while (index <= end_index) { ret = filemap_get_folios(inode->i_mapping, &index, end_index, &fbatch); if (ret == 0) return; for (i = 0; i < ret; i++) { struct folio *folio = fbatch.folios[i]; btrfs_folio_clamp_clear_writeback(fs_info, folio, cb->start, cb->len); } folio_batch_release(&fbatch); } /* the inode may be gone now */ } /* * Do the cleanup once all the compressed pages hit the disk. This will clear * writeback on the file pages and free the compressed pages. * * This also calls the writeback end hooks for the file pages so that metadata * and checksums can be updated in the file. */ static void end_bbio_compressed_write(struct btrfs_bio *bbio) { struct compressed_bio *cb = to_compressed_bio(bbio); btrfs_finish_ordered_extent(cb->bbio.ordered, NULL, cb->start, cb->len, cb->bbio.bio.bi_status == BLK_STS_OK); if (cb->writeback) end_compressed_writeback(cb); /* Note, our inode could be gone now */ btrfs_free_compressed_folios(cb); bio_put(&cb->bbio.bio); } static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb) { struct bio *bio = &cb->bbio.bio; u32 offset = 0; unsigned int findex = 0; while (offset < cb->compressed_len) { struct folio *folio = cb->compressed_folios[findex]; u32 len = min_t(u32, cb->compressed_len - offset, folio_size(folio)); int ret; /* Maximum compressed extent is smaller than bio size limit. */ ret = bio_add_folio(bio, folio, len, 0); ASSERT(ret); offset += len; findex++; } } /* * worker function to build and submit bios for previously compressed pages. * The corresponding pages in the inode should be marked for writeback * and the compressed pages should have a reference on them for dropping * when the IO is complete. * * This also checksums the file bytes and gets things ready for * the end io hooks. */ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered, struct folio **compressed_folios, unsigned int nr_folios, blk_opf_t write_flags, bool writeback) { struct btrfs_inode *inode = ordered->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; struct compressed_bio *cb; ASSERT(IS_ALIGNED(ordered->file_offset, fs_info->sectorsize)); ASSERT(IS_ALIGNED(ordered->num_bytes, fs_info->sectorsize)); cb = alloc_compressed_bio(inode, ordered->file_offset, REQ_OP_WRITE | write_flags, end_bbio_compressed_write); cb->start = ordered->file_offset; cb->len = ordered->num_bytes; cb->compressed_folios = compressed_folios; cb->compressed_len = ordered->disk_num_bytes; cb->writeback = writeback; cb->nr_folios = nr_folios; cb->bbio.bio.bi_iter.bi_sector = ordered->disk_bytenr >> SECTOR_SHIFT; cb->bbio.ordered = ordered; btrfs_add_compressed_bio_folios(cb); btrfs_submit_bbio(&cb->bbio, 0); } /* * Add extra pages in the same compressed file extent so that we don't need to * re-read the same extent again and again. * * NOTE: this won't work well for subpage, as for subpage read, we lock the * full page then submit bio for each compressed/regular extents. * * This means, if we have several sectors in the same page points to the same * on-disk compressed data, we will re-read the same extent many times and * this function can only help for the next page. */ static noinline int add_ra_bio_pages(struct inode *inode, u64 compressed_end, struct compressed_bio *cb, int *memstall, unsigned long *pflags) { struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); pgoff_t end_index; struct bio *orig_bio = &cb->orig_bbio->bio; u64 cur = cb->orig_bbio->file_offset + orig_bio->bi_iter.bi_size; u64 isize = i_size_read(inode); int ret; struct folio *folio; struct extent_map *em; struct address_space *mapping = inode->i_mapping; struct extent_map_tree *em_tree; struct extent_io_tree *tree; int sectors_missed = 0; em_tree = &BTRFS_I(inode)->extent_tree; tree = &BTRFS_I(inode)->io_tree; if (isize == 0) return 0; /* * For current subpage support, we only support 64K page size, * which means maximum compressed extent size (128K) is just 2x page * size. * This makes readahead less effective, so here disable readahead for * subpage for now, until full compressed write is supported. */ if (fs_info->sectorsize < PAGE_SIZE) return 0; /* For bs > ps cases, we don't support readahead for compressed folios for now. */ if (fs_info->block_min_order) return 0; end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; while (cur < compressed_end) { pgoff_t page_end; pgoff_t pg_index = cur >> PAGE_SHIFT; u32 add_size; if (pg_index > end_index) break; folio = filemap_get_folio(mapping, pg_index); if (!IS_ERR(folio)) { u64 folio_sz = folio_size(folio); u64 offset = offset_in_folio(folio, cur); folio_put(folio); sectors_missed += (folio_sz - offset) >> fs_info->sectorsize_bits; /* Beyond threshold, no need to continue */ if (sectors_missed > 4) break; /* * Jump to next page start as we already have page for * current offset. */ cur += (folio_sz - offset); continue; } folio = filemap_alloc_folio(mapping_gfp_constraint(mapping, ~__GFP_FS), 0, NULL); if (!folio) break; if (filemap_add_folio(mapping, folio, pg_index, GFP_NOFS)) { /* There is already a page, skip to page end */ cur += folio_size(folio); folio_put(folio); continue; } if (!*memstall && folio_test_workingset(folio)) { psi_memstall_enter(pflags); *memstall = 1; } ret = set_folio_extent_mapped(folio); if (ret < 0) { folio_unlock(folio); folio_put(folio); break; } page_end = (pg_index << PAGE_SHIFT) + folio_size(folio) - 1; btrfs_lock_extent(tree, cur, page_end, NULL); read_lock(&em_tree->lock); em = btrfs_lookup_extent_mapping(em_tree, cur, page_end + 1 - cur); read_unlock(&em_tree->lock); /* * At this point, we have a locked page in the page cache for * these bytes in the file. But, we have to make sure they map * to this compressed extent on disk. */ if (!em || cur < em->start || (cur + fs_info->sectorsize > btrfs_extent_map_end(em)) || (btrfs_extent_map_block_start(em) >> SECTOR_SHIFT) != orig_bio->bi_iter.bi_sector) { btrfs_free_extent_map(em); btrfs_unlock_extent(tree, cur, page_end, NULL); folio_unlock(folio); folio_put(folio); break; } add_size = min(em->start + em->len, page_end + 1) - cur; btrfs_free_extent_map(em); btrfs_unlock_extent(tree, cur, page_end, NULL); if (folio_contains(folio, end_index)) { size_t zero_offset = offset_in_folio(folio, isize); if (zero_offset) { int zeros; zeros = folio_size(folio) - zero_offset; folio_zero_range(folio, zero_offset, zeros); } } if (!bio_add_folio(orig_bio, folio, add_size, offset_in_folio(folio, cur))) { folio_unlock(folio); folio_put(folio); break; } /* * If it's subpage, we also need to increase its * subpage::readers number, as at endio we will decrease * subpage::readers and to unlock the page. */ if (fs_info->sectorsize < PAGE_SIZE) btrfs_folio_set_lock(fs_info, folio, cur, add_size); folio_put(folio); cur += add_size; } return 0; } /* * for a compressed read, the bio we get passed has all the inode pages * in it. We don't actually do IO on those pages but allocate new ones * to hold the compressed pages on disk. * * bio->bi_iter.bi_sector points to the compressed extent on disk * bio->bi_io_vec points to all of the inode pages * * After the compressed pages are read, we copy the bytes into the * bio we were passed and then call the bio end_io calls */ void btrfs_submit_compressed_read(struct btrfs_bio *bbio) { struct btrfs_inode *inode = bbio->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; struct extent_map_tree *em_tree = &inode->extent_tree; struct compressed_bio *cb; unsigned int compressed_len; u64 file_offset = bbio->file_offset; u64 em_len; u64 em_start; struct extent_map *em; unsigned long pflags; int memstall = 0; blk_status_t status; int ret; /* we need the actual starting offset of this extent in the file */ read_lock(&em_tree->lock); em = btrfs_lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize); read_unlock(&em_tree->lock); if (!em) { status = BLK_STS_IOERR; goto out; } ASSERT(btrfs_extent_map_is_compressed(em)); compressed_len = em->disk_num_bytes; cb = alloc_compressed_bio(inode, file_offset, REQ_OP_READ, end_bbio_compressed_read); cb->start = em->start - em->offset; em_len = em->len; em_start = em->start; cb->len = bbio->bio.bi_iter.bi_size; cb->compressed_len = compressed_len; cb->compress_type = btrfs_extent_map_compression(em); cb->orig_bbio = bbio; cb->bbio.csum_search_commit_root = bbio->csum_search_commit_root; btrfs_free_extent_map(em); cb->nr_folios = DIV_ROUND_UP(compressed_len, btrfs_min_folio_size(fs_info)); cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct folio *), GFP_NOFS); if (!cb->compressed_folios) { status = BLK_STS_RESOURCE; goto out_free_bio; } ret = btrfs_alloc_folio_array(cb->nr_folios, fs_info->block_min_order, cb->compressed_folios); if (ret) { status = BLK_STS_RESOURCE; goto out_free_compressed_pages; } add_ra_bio_pages(&inode->vfs_inode, em_start + em_len, cb, &memstall, &pflags); /* include any pages we added in add_ra-bio_pages */ cb->len = bbio->bio.bi_iter.bi_size; cb->bbio.bio.bi_iter.bi_sector = bbio->bio.bi_iter.bi_sector; btrfs_add_compressed_bio_folios(cb); if (memstall) psi_memstall_leave(&pflags); btrfs_submit_bbio(&cb->bbio, 0); return; out_free_compressed_pages: kfree(cb->compressed_folios); out_free_bio: bio_put(&cb->bbio.bio); out: btrfs_bio_end_io(bbio, status); } /* * Heuristic uses systematic sampling to collect data from the input data * range, the logic can be tuned by the following constants: * * @SAMPLING_READ_SIZE - how many bytes will be copied from for each sample * @SAMPLING_INTERVAL - range from which the sampled data can be collected */ #define SAMPLING_READ_SIZE (16) #define SAMPLING_INTERVAL (256) /* * For statistical analysis of the input data we consider bytes that form a * Galois Field of 256 objects. Each object has an attribute count, ie. how * many times the object appeared in the sample. */ #define BUCKET_SIZE (256) /* * The size of the sample is based on a statistical sampling rule of thumb. * The common way is to perform sampling tests as long as the number of * elements in each cell is at least 5. * * Instead of 5, we choose 32 to obtain more accurate results. * If the data contain the maximum number of symbols, which is 256, we obtain a * sample size bound by 8192. * * For a sample of at most 8KB of data per data range: 16 consecutive bytes * from up to 512 locations. */ #define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED * \ SAMPLING_READ_SIZE / SAMPLING_INTERVAL) struct bucket_item { u32 count; }; struct heuristic_ws { /* Partial copy of input data */ u8 *sample; u32 sample_size; /* Buckets store counters for each byte value */ struct bucket_item *bucket; /* Sorting buffer */ struct bucket_item *bucket_b; struct list_head list; }; static void free_heuristic_ws(struct list_head *ws) { struct heuristic_ws *workspace; workspace = list_entry(ws, struct heuristic_ws, list); kvfree(workspace->sample); kfree(workspace->bucket); kfree(workspace->bucket_b); kfree(workspace); } static struct list_head *alloc_heuristic_ws(struct btrfs_fs_info *fs_info) { struct heuristic_ws *ws; ws = kzalloc(sizeof(*ws), GFP_KERNEL); if (!ws) return ERR_PTR(-ENOMEM); ws->sample = kvmalloc(MAX_SAMPLE_SIZE, GFP_KERNEL); if (!ws->sample) goto fail; ws->bucket = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket), GFP_KERNEL); if (!ws->bucket) goto fail; ws->bucket_b = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket_b), GFP_KERNEL); if (!ws->bucket_b) goto fail; INIT_LIST_HEAD(&ws->list); return &ws->list; fail: free_heuristic_ws(&ws->list); return ERR_PTR(-ENOMEM); } const struct btrfs_compress_levels btrfs_heuristic_compress = { 0 }; static const struct btrfs_compress_levels * const btrfs_compress_levels[] = { /* The heuristic is represented as compression type 0 */ &btrfs_heuristic_compress, &btrfs_zlib_compress, &btrfs_lzo_compress, &btrfs_zstd_compress, }; static struct list_head *alloc_workspace(struct btrfs_fs_info *fs_info, int type, int level) { switch (type) { case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(fs_info); case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(fs_info, level); case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace(fs_info); case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(fs_info, level); default: /* * This can't happen, the type is validated several times * before we get here. */ BUG(); } } static void free_workspace(int type, struct list_head *ws) { switch (type) { case BTRFS_COMPRESS_NONE: return free_heuristic_ws(ws); case BTRFS_COMPRESS_ZLIB: return zlib_free_workspace(ws); case BTRFS_COMPRESS_LZO: return lzo_free_workspace(ws); case BTRFS_COMPRESS_ZSTD: return zstd_free_workspace(ws); default: /* * This can't happen, the type is validated several times * before we get here. */ BUG(); } } static int alloc_workspace_manager(struct btrfs_fs_info *fs_info, enum btrfs_compression_type type) { struct workspace_manager *gwsm; struct list_head *workspace; ASSERT(fs_info->compr_wsm[type] == NULL); gwsm = kzalloc(sizeof(*gwsm), GFP_KERNEL); if (!gwsm) return -ENOMEM; INIT_LIST_HEAD(&gwsm->idle_ws); spin_lock_init(&gwsm->ws_lock); atomic_set(&gwsm->total_ws, 0); init_waitqueue_head(&gwsm->ws_wait); fs_info->compr_wsm[type] = gwsm; /* * Preallocate one workspace for each compression type so we can * guarantee forward progress in the worst case */ workspace = alloc_workspace(fs_info, type, 0); if (IS_ERR(workspace)) { btrfs_warn(fs_info, "cannot preallocate compression workspace for %s, will try later", btrfs_compress_type2str(type)); } else { atomic_set(&gwsm->total_ws, 1); gwsm->free_ws = 1; list_add(workspace, &gwsm->idle_ws); } return 0; } static void free_workspace_manager(struct btrfs_fs_info *fs_info, enum btrfs_compression_type type) { struct list_head *ws; struct workspace_manager *gwsm = fs_info->compr_wsm[type]; /* ZSTD uses its own workspace manager, should enter here. */ ASSERT(type != BTRFS_COMPRESS_ZSTD && type < BTRFS_NR_COMPRESS_TYPES); if (!gwsm) return; fs_info->compr_wsm[type] = NULL; while (!list_empty(&gwsm->idle_ws)) { ws = gwsm->idle_ws.next; list_del(ws); free_workspace(type, ws); atomic_dec(&gwsm->total_ws); } kfree(gwsm); } /* * This finds an available workspace or allocates a new one. * If it's not possible to allocate a new one, waits until there's one. * Preallocation makes a forward progress guarantees and we do not return * errors. */ struct list_head *btrfs_get_workspace(struct btrfs_fs_info *fs_info, int type, int level) { struct workspace_manager *wsm = fs_info->compr_wsm[type]; struct list_head *workspace; int cpus = num_online_cpus(); unsigned nofs_flag; struct list_head *idle_ws; spinlock_t *ws_lock; atomic_t *total_ws; wait_queue_head_t *ws_wait; int *free_ws; ASSERT(wsm); idle_ws = &wsm->idle_ws; ws_lock = &wsm->ws_lock; total_ws = &wsm->total_ws; ws_wait = &wsm->ws_wait; free_ws = &wsm->free_ws; again: spin_lock(ws_lock); if (!list_empty(idle_ws)) { workspace = idle_ws->next; list_del(workspace); (*free_ws)--; spin_unlock(ws_lock); return workspace; } if (atomic_read(total_ws) > cpus) { DEFINE_WAIT(wait); spin_unlock(ws_lock); prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE); if (atomic_read(total_ws) > cpus && !*free_ws) schedule(); finish_wait(ws_wait, &wait); goto again; } atomic_inc(total_ws); spin_unlock(ws_lock); /* * Allocation helpers call vmalloc that can't use GFP_NOFS, so we have * to turn it off here because we might get called from the restricted * context of btrfs_compress_bio/btrfs_compress_pages */ nofs_flag = memalloc_nofs_save(); workspace = alloc_workspace(fs_info, type, level); memalloc_nofs_restore(nofs_flag); if (IS_ERR(workspace)) { atomic_dec(total_ws); wake_up(ws_wait); /* * Do not return the error but go back to waiting. There's a * workspace preallocated for each type and the compression * time is bounded so we get to a workspace eventually. This * makes our caller's life easier. * * To prevent silent and low-probability deadlocks (when the * initial preallocation fails), check if there are any * workspaces at all. */ if (atomic_read(total_ws) == 0) { static DEFINE_RATELIMIT_STATE(_rs, /* once per minute */ 60 * HZ, /* no burst */ 1); if (__ratelimit(&_rs)) btrfs_warn(fs_info, "no compression workspaces, low memory, retrying"); } goto again; } return workspace; } static struct list_head *get_workspace(struct btrfs_fs_info *fs_info, int type, int level) { switch (type) { case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(fs_info, type, level); case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(fs_info, level); case BTRFS_COMPRESS_LZO: return btrfs_get_workspace(fs_info, type, level); case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(fs_info, level); default: /* * This can't happen, the type is validated several times * before we get here. */ BUG(); } } /* * put a workspace struct back on the list or free it if we have enough * idle ones sitting around */ void btrfs_put_workspace(struct btrfs_fs_info *fs_info, int type, struct list_head *ws) { struct workspace_manager *gwsm = fs_info->compr_wsm[type]; struct list_head *idle_ws; spinlock_t *ws_lock; atomic_t *total_ws; wait_queue_head_t *ws_wait; int *free_ws; ASSERT(gwsm); idle_ws = &gwsm->idle_ws; ws_lock = &gwsm->ws_lock; total_ws = &gwsm->total_ws; ws_wait = &gwsm->ws_wait; free_ws = &gwsm->free_ws; spin_lock(ws_lock); if (*free_ws <= num_online_cpus()) { list_add(ws, idle_ws); (*free_ws)++; spin_unlock(ws_lock); goto wake; } spin_unlock(ws_lock); free_workspace(type, ws); atomic_dec(total_ws); wake: cond_wake_up(ws_wait); } static void put_workspace(struct btrfs_fs_info *fs_info, int type, struct list_head *ws) { switch (type) { case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(fs_info, type, ws); case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(fs_info, type, ws); case BTRFS_COMPRESS_LZO: return btrfs_put_workspace(fs_info, type, ws); case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(fs_info, ws); default: /* * This can't happen, the type is validated several times * before we get here. */ BUG(); } } /* * Adjust @level according to the limits of the compression algorithm or * fallback to default */ static int btrfs_compress_set_level(unsigned int type, int level) { const struct btrfs_compress_levels *levels = btrfs_compress_levels[type]; if (level == 0) level = levels->default_level; else level = clamp(level, levels->min_level, levels->max_level); return level; } /* * Check whether the @level is within the valid range for the given type. */ bool btrfs_compress_level_valid(unsigned int type, int level) { const struct btrfs_compress_levels *levels = btrfs_compress_levels[type]; return levels->min_level <= level && level <= levels->max_level; } /* Wrapper around find_get_page(), with extra error message. */ int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start, struct folio **in_folio_ret) { struct folio *in_folio; /* * The compressed write path should have the folio locked already, thus * we only need to grab one reference. */ in_folio = filemap_get_folio(mapping, start >> PAGE_SHIFT); if (IS_ERR(in_folio)) { struct btrfs_inode *inode = BTRFS_I(mapping->host); btrfs_crit(inode->root->fs_info, "failed to get page cache, root %lld ino %llu file offset %llu", btrfs_root_id(inode->root), btrfs_ino(inode), start); return -ENOENT; } *in_folio_ret = in_folio; return 0; } /* * Given an address space and start and length, compress the bytes into @pages * that are allocated on demand. * * @type_level is encoded algorithm and level, where level 0 means whatever * default the algorithm chooses and is opaque here; * - compression algo are 0-3 * - the level are bits 4-7 * * @out_folios is an in/out parameter, holds maximum number of folios to allocate * and returns number of actually allocated folios * * @total_in is used to return the number of bytes actually read. It * may be smaller than the input length if we had to exit early because we * ran out of room in the folios array or because we cross the * max_out threshold. * * @total_out is an in/out parameter, must be set to the input length and will * be also used to return the total number of compressed bytes */ int btrfs_compress_folios(unsigned int type, int level, struct btrfs_inode *inode, u64 start, struct folio **folios, unsigned long *out_folios, unsigned long *total_in, unsigned long *total_out) { struct btrfs_fs_info *fs_info = inode->root->fs_info; const unsigned long orig_len = *total_out; struct list_head *workspace; int ret; level = btrfs_compress_set_level(type, level); workspace = get_workspace(fs_info, type, level); ret = compression_compress_pages(type, workspace, inode, start, folios, out_folios, total_in, total_out); /* The total read-in bytes should be no larger than the input. */ ASSERT(*total_in <= orig_len); put_workspace(fs_info, type, workspace); return ret; } static int btrfs_decompress_bio(struct compressed_bio *cb) { struct btrfs_fs_info *fs_info = cb_to_fs_info(cb); struct list_head *workspace; int ret; int type = cb->compress_type; workspace = get_workspace(fs_info, type, 0); ret = compression_decompress_bio(workspace, cb); put_workspace(fs_info, type, workspace); if (!ret) zero_fill_bio(&cb->orig_bbio->bio); return ret; } /* * a less complex decompression routine. Our compressed data fits in a * single page, and we want to read a single page out of it. * dest_pgoff tells us the offset into the destination folio where we write the * decompressed data. */ int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, size_t destlen) { struct btrfs_fs_info *fs_info = folio_to_fs_info(dest_folio); struct list_head *workspace; const u32 sectorsize = fs_info->sectorsize; int ret; /* * The full destination folio range should not exceed the folio size. * And the @destlen should not exceed sectorsize, as this is only called for * inline file extents, which should not exceed sectorsize. */ ASSERT(dest_pgoff + destlen <= folio_size(dest_folio) && destlen <= sectorsize); workspace = get_workspace(fs_info, type, 0); ret = compression_decompress(type, workspace, data_in, dest_folio, dest_pgoff, srclen, destlen); put_workspace(fs_info, type, workspace); return ret; } int btrfs_alloc_compress_wsm(struct btrfs_fs_info *fs_info) { int ret; ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_NONE); if (ret < 0) goto error; ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_ZLIB); if (ret < 0) goto error; ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_LZO); if (ret < 0) goto error; ret = zstd_alloc_workspace_manager(fs_info); if (ret < 0) goto error; return 0; error: btrfs_free_compress_wsm(fs_info); return ret; } void btrfs_free_compress_wsm(struct btrfs_fs_info *fs_info) { free_workspace_manager(fs_info, BTRFS_COMPRESS_NONE); free_workspace_manager(fs_info, BTRFS_COMPRESS_ZLIB); free_workspace_manager(fs_info, BTRFS_COMPRESS_LZO); zstd_free_workspace_manager(fs_info); } int __init btrfs_init_compress(void) { if (bioset_init(&btrfs_compressed_bioset, BIO_POOL_SIZE, offsetof(struct compressed_bio, bbio.bio), BIOSET_NEED_BVECS)) return -ENOMEM; compr_pool.shrinker = shrinker_alloc(SHRINKER_NONSLAB, "btrfs-compr-pages"); if (!compr_pool.shrinker) return -ENOMEM; spin_lock_init(&compr_pool.lock); INIT_LIST_HEAD(&compr_pool.list); compr_pool.count = 0; /* 128K / 4K = 32, for 8 threads is 256 pages. */ compr_pool.thresh = BTRFS_MAX_COMPRESSED / PAGE_SIZE * 8; compr_pool.shrinker->count_objects = btrfs_compr_pool_count; compr_pool.shrinker->scan_objects = btrfs_compr_pool_scan; compr_pool.shrinker->batch = 32; compr_pool.shrinker->seeks = DEFAULT_SEEKS; shrinker_register(compr_pool.shrinker); return 0; } void __cold btrfs_exit_compress(void) { /* For now scan drains all pages and does not touch the parameters. */ btrfs_compr_pool_scan(NULL, NULL); shrinker_free(compr_pool.shrinker); bioset_exit(&btrfs_compressed_bioset); } /* * The bvec is a single page bvec from a bio that contains folios from a filemap. * * Since the folio may be a large one, and if the bv_page is not a head page of * a large folio, then page->index is unreliable. * * Thus we need this helper to grab the proper file offset. */ static u64 file_offset_from_bvec(const struct bio_vec *bvec) { const struct page *page = bvec->bv_page; const struct folio *folio = page_folio(page); return (page_pgoff(folio, page) << PAGE_SHIFT) + bvec->bv_offset; } /* * Copy decompressed data from working buffer to pages. * * @buf: The decompressed data buffer * @buf_len: The decompressed data length * @decompressed: Number of bytes that are already decompressed inside the * compressed extent * @cb: The compressed extent descriptor * @orig_bio: The original bio that the caller wants to read for * * An easier to understand graph is like below: * * |<- orig_bio ->| |<- orig_bio->| * |<------- full decompressed extent ----->| * |<----------- @cb range ---->| * | |<-- @buf_len -->| * |<--- @decompressed --->| * * Note that, @cb can be a subpage of the full decompressed extent, but * @cb->start always has the same as the orig_file_offset value of the full * decompressed extent. * * When reading compressed extent, we have to read the full compressed extent, * while @orig_bio may only want part of the range. * Thus this function will ensure only data covered by @orig_bio will be copied * to. * * Return 0 if we have copied all needed contents for @orig_bio. * Return >0 if we need continue decompress. */ int btrfs_decompress_buf2page(const char *buf, u32 buf_len, struct compressed_bio *cb, u32 decompressed) { struct bio *orig_bio = &cb->orig_bbio->bio; /* Offset inside the full decompressed extent */ u32 cur_offset; cur_offset = decompressed; /* The main loop to do the copy */ while (cur_offset < decompressed + buf_len) { struct bio_vec bvec; size_t copy_len; u32 copy_start; /* Offset inside the full decompressed extent */ u32 bvec_offset; void *kaddr; bvec = bio_iter_iovec(orig_bio, orig_bio->bi_iter); /* * cb->start may underflow, but subtracting that value can still * give us correct offset inside the full decompressed extent. */ bvec_offset = file_offset_from_bvec(&bvec) - cb->start; /* Haven't reached the bvec range, exit */ if (decompressed + buf_len <= bvec_offset) return 1; copy_start = max(cur_offset, bvec_offset); copy_len = min(bvec_offset + bvec.bv_len, decompressed + buf_len) - copy_start; ASSERT(copy_len); /* * Extra range check to ensure we didn't go beyond * @buf + @buf_len. */ ASSERT(copy_start - decompressed < buf_len); kaddr = bvec_kmap_local(&bvec); memcpy(kaddr, buf + copy_start - decompressed, copy_len); kunmap_local(kaddr); cur_offset += copy_len; bio_advance(orig_bio, copy_len); /* Finished the bio */ if (!orig_bio->bi_iter.bi_size) return 0; } return 1; } /* * Shannon Entropy calculation * * Pure byte distribution analysis fails to determine compressibility of data. * Try calculating entropy to estimate the average minimum number of bits * needed to encode the sampled data. * * For convenience, return the percentage of needed bits, instead of amount of * bits directly. * * @ENTROPY_LVL_ACEPTABLE - below that threshold, sample has low byte entropy * and can be compressible with high probability * * @ENTROPY_LVL_HIGH - data are not compressible with high probability * * Use of ilog2() decreases precision, we lower the LVL to 5 to compensate. */ #define ENTROPY_LVL_ACEPTABLE (65) #define ENTROPY_LVL_HIGH (80) /* * For increased precision in shannon_entropy calculation, * let's do pow(n, M) to save more digits after comma: * * - maximum int bit length is 64 * - ilog2(MAX_SAMPLE_SIZE) -> 13 * - 13 * 4 = 52 < 64 -> M = 4 * * So use pow(n, 4). */ static inline u32 ilog2_w(u64 n) { return ilog2(n * n * n * n); } static u32 shannon_entropy(struct heuristic_ws *ws) { const u32 entropy_max = 8 * ilog2_w(2); u32 entropy_sum = 0; u32 p, p_base, sz_base; u32 i; sz_base = ilog2_w(ws->sample_size); for (i = 0; i < BUCKET_SIZE && ws->bucket[i].count > 0; i++) { p = ws->bucket[i].count; p_base = ilog2_w(p); entropy_sum += p * (sz_base - p_base); } entropy_sum /= ws->sample_size; return entropy_sum * 100 / entropy_max; } #define RADIX_BASE 4U #define COUNTERS_SIZE (1U << RADIX_BASE) static u8 get4bits(u64 num, int shift) { u8 low4bits; num >>= shift; /* Reverse order */ low4bits = (COUNTERS_SIZE - 1) - (num % COUNTERS_SIZE); return low4bits; } /* * Use 4 bits as radix base * Use 16 u32 counters for calculating new position in buf array * * @array - array that will be sorted * @array_buf - buffer array to store sorting results * must be equal in size to @array * @num - array size */ static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf, int num) { u64 max_num; u64 buf_num; u32 counters[COUNTERS_SIZE]; u32 new_addr; u32 addr; int bitlen; int shift; int i; /* * Try avoid useless loop iterations for small numbers stored in big * counters. Example: 48 33 4 ... in 64bit array */ max_num = array[0].count; for (i = 1; i < num; i++) { buf_num = array[i].count; if (buf_num > max_num) max_num = buf_num; } buf_num = ilog2(max_num); bitlen = ALIGN(buf_num, RADIX_BASE * 2); shift = 0; while (shift < bitlen) { memset(counters, 0, sizeof(counters)); for (i = 0; i < num; i++) { buf_num = array[i].count; addr = get4bits(buf_num, shift); counters[addr]++; } for (i = 1; i < COUNTERS_SIZE; i++) counters[i] += counters[i - 1]; for (i = num - 1; i >= 0; i--) { buf_num = array[i].count; addr = get4bits(buf_num, shift); counters[addr]--; new_addr = counters[addr]; array_buf[new_addr] = array[i]; } shift += RADIX_BASE; /* * Normal radix expects to move data from a temporary array, to * the main one. But that requires some CPU time. Avoid that * by doing another sort iteration to original array instead of * memcpy() */ memset(counters, 0, sizeof(counters)); for (i = 0; i < num; i ++) { buf_num = array_buf[i].count; addr = get4bits(buf_num, shift); counters[addr]++; } for (i = 1; i < COUNTERS_SIZE; i++) counters[i] += counters[i - 1]; for (i = num - 1; i >= 0; i--) { buf_num = array_buf[i].count; addr = get4bits(buf_num, shift); counters[addr]--; new_addr = counters[addr]; array[new_addr] = array_buf[i]; } shift += RADIX_BASE; } } /* * Size of the core byte set - how many bytes cover 90% of the sample * * There are several types of structured binary data that use nearly all byte * values. The distribution can be uniform and counts in all buckets will be * nearly the same (eg. encrypted data). Unlikely to be compressible. * * Other possibility is normal (Gaussian) distribution, where the data could * be potentially compressible, but we have to take a few more steps to decide * how much. * * @BYTE_CORE_SET_LOW - main part of byte values repeated frequently, * compression algo can easy fix that * @BYTE_CORE_SET_HIGH - data have uniform distribution and with high * probability is not compressible */ #define BYTE_CORE_SET_LOW (64) #define BYTE_CORE_SET_HIGH (200) static int byte_core_set_size(struct heuristic_ws *ws) { u32 i; u32 coreset_sum = 0; const u32 core_set_threshold = ws->sample_size * 90 / 100; struct bucket_item *bucket = ws->bucket; /* Sort in reverse order */ radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE); for (i = 0; i < BYTE_CORE_SET_LOW; i++) coreset_sum += bucket[i].count; if (coreset_sum > core_set_threshold) return i; for (; i < BYTE_CORE_SET_HIGH && bucket[i].count > 0; i++) { coreset_sum += bucket[i].count; if (coreset_sum > core_set_threshold) break; } return i; } /* * Count byte values in buckets. * This heuristic can detect textual data (configs, xml, json, html, etc). * Because in most text-like data byte set is restricted to limited number of * possible characters, and that restriction in most cases makes data easy to * compress. * * @BYTE_SET_THRESHOLD - consider all data within this byte set size: * less - compressible * more - need additional analysis */ #define BYTE_SET_THRESHOLD (64) static u32 byte_set_size(const struct heuristic_ws *ws) { u32 i; u32 byte_set_size = 0; for (i = 0; i < BYTE_SET_THRESHOLD; i++) { if (ws->bucket[i].count > 0) byte_set_size++; } /* * Continue collecting count of byte values in buckets. If the byte * set size is bigger then the threshold, it's pointless to continue, * the detection technique would fail for this type of data. */ for (; i < BUCKET_SIZE; i++) { if (ws->bucket[i].count > 0) { byte_set_size++; if (byte_set_size > BYTE_SET_THRESHOLD) return byte_set_size; } } return byte_set_size; } static bool sample_repeated_patterns(struct heuristic_ws *ws) { const u32 half_of_sample = ws->sample_size / 2; const u8 *data = ws->sample; return memcmp(&data[0], &data[half_of_sample], half_of_sample) == 0; } static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end, struct heuristic_ws *ws) { struct page *page; pgoff_t index, index_end; u32 i, curr_sample_pos; u8 *in_data; /* * Compression handles the input data by chunks of 128KiB * (defined by BTRFS_MAX_UNCOMPRESSED) * * We do the same for the heuristic and loop over the whole range. * * MAX_SAMPLE_SIZE - calculated under assumption that heuristic will * process no more than BTRFS_MAX_UNCOMPRESSED at a time. */ if (end - start > BTRFS_MAX_UNCOMPRESSED) end = start + BTRFS_MAX_UNCOMPRESSED; index = start >> PAGE_SHIFT; index_end = end >> PAGE_SHIFT; /* Don't miss unaligned end */ if (!PAGE_ALIGNED(end)) index_end++; curr_sample_pos = 0; while (index < index_end) { page = find_get_page(inode->i_mapping, index); in_data = kmap_local_page(page); /* Handle case where the start is not aligned to PAGE_SIZE */ i = start % PAGE_SIZE; while (i < PAGE_SIZE - SAMPLING_READ_SIZE) { /* Don't sample any garbage from the last page */ if (start > end - SAMPLING_READ_SIZE) break; memcpy(&ws->sample[curr_sample_pos], &in_data[i], SAMPLING_READ_SIZE); i += SAMPLING_INTERVAL; start += SAMPLING_INTERVAL; curr_sample_pos += SAMPLING_READ_SIZE; } kunmap_local(in_data); put_page(page); index++; } ws->sample_size = curr_sample_pos; } /* * Compression heuristic. * * The following types of analysis can be performed: * - detect mostly zero data * - detect data with low "byte set" size (text, etc) * - detect data with low/high "core byte" set * * Return non-zero if the compression should be done, 0 otherwise. */ int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct list_head *ws_list = get_workspace(fs_info, 0, 0); struct heuristic_ws *ws; u32 i; u8 byte; int ret = 0; ws = list_entry(ws_list, struct heuristic_ws, list); heuristic_collect_sample(&inode->vfs_inode, start, end, ws); if (sample_repeated_patterns(ws)) { ret = 1; goto out; } memset(ws->bucket, 0, sizeof(*ws->bucket)*BUCKET_SIZE); for (i = 0; i < ws->sample_size; i++) { byte = ws->sample[i]; ws->bucket[byte].count++; } i = byte_set_size(ws); if (i < BYTE_SET_THRESHOLD) { ret = 2; goto out; } i = byte_core_set_size(ws); if (i <= BYTE_CORE_SET_LOW) { ret = 3; goto out; } if (i >= BYTE_CORE_SET_HIGH) { ret = 0; goto out; } i = shannon_entropy(ws); if (i <= ENTROPY_LVL_ACEPTABLE) { ret = 4; goto out; } /* * For the levels below ENTROPY_LVL_HIGH, additional analysis would be * needed to give green light to compression. * * For now just assume that compression at that level is not worth the * resources because: * * 1. it is possible to defrag the data later * * 2. the data would turn out to be hardly compressible, eg. 150 byte * values, every bucket has counter at level ~54. The heuristic would * be confused. This can happen when data have some internal repeated * patterns like "abbacbbc...". This can be detected by analyzing * pairs of bytes, which is too costly. */ if (i < ENTROPY_LVL_HIGH) { ret = 5; goto out; } else { ret = 0; goto out; } out: put_workspace(fs_info, 0, ws_list); return ret; } /* * Convert the compression suffix (eg. after "zlib" starting with ":") to level. * * If the resulting level exceeds the algo's supported levels, it will be clamped. * * Return <0 if no valid string can be found. * Return 0 if everything is fine. */ int btrfs_compress_str2level(unsigned int type, const char *str, int *level_ret) { int level = 0; int ret; if (!type) { *level_ret = btrfs_compress_set_level(type, level); return 0; } if (str[0] == ':') { ret = kstrtoint(str + 1, 10, &level); if (ret) return ret; } *level_ret = btrfs_compress_set_level(type, level); return 0; } |
| 2 2 1 1 8 8 8 7 1 1 1 5 4 4 2 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/module.h> #include <linux/errno.h> #include <linux/socket.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/udp.h> #include <linux/icmpv6.h> #include <linux/types.h> #include <linux/kernel.h> #include <net/fou.h> #include <net/ip.h> #include <net/ip6_tunnel.h> #include <net/ip6_checksum.h> #include <net/protocol.h> #include <net/udp.h> #include <net/udp_tunnel.h> #if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL) static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, struct flowi6 *fl6, u8 *protocol, __be16 sport) { struct udphdr *uh; skb_push(skb, sizeof(struct udphdr)); skb_reset_transport_header(skb); uh = udp_hdr(skb); uh->dest = e->dport; uh->source = sport; uh->len = htons(skb->len); udp6_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM6), skb, &fl6->saddr, &fl6->daddr, skb->len); *protocol = IPPROTO_UDP; } static int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi6 *fl6) { __be16 sport; int err; int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM6 ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; err = __fou_build_header(skb, e, protocol, &sport, type); if (err) return err; fou6_build_udp(skb, e, fl6, protocol, sport); return 0; } static int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi6 *fl6) { __be16 sport; int err; int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM6 ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; err = __gue_build_header(skb, e, protocol, &sport, type); if (err) return err; fou6_build_udp(skb, e, fl6, protocol, sport); return 0; } static int gue6_err_proto_handler(int proto, struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct inet6_protocol *ipprot; ipprot = rcu_dereference(inet6_protos[proto]); if (ipprot && ipprot->err_handler) { if (!ipprot->err_handler(skb, opt, type, code, offset, info)) return 0; } return -ENOENT; } static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { int transport_offset = skb_transport_offset(skb); struct guehdr *guehdr; size_t len, optlen; int ret; len = sizeof(struct udphdr) + sizeof(struct guehdr); if (!pskb_may_pull(skb, transport_offset + len)) return -EINVAL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; switch (guehdr->version) { case 0: /* Full GUE header present */ break; case 1: { /* Direct encasulation of IPv4 or IPv6 */ skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr)); switch (((struct iphdr *)guehdr)->version) { case 4: ret = gue6_err_proto_handler(IPPROTO_IPIP, skb, opt, type, code, offset, info); goto out; case 6: ret = gue6_err_proto_handler(IPPROTO_IPV6, skb, opt, type, code, offset, info); goto out; default: ret = -EOPNOTSUPP; goto out; } } default: /* Undefined version */ return -EOPNOTSUPP; } if (guehdr->control) return -ENOENT; optlen = guehdr->hlen << 2; if (!pskb_may_pull(skb, transport_offset + len + optlen)) return -EINVAL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; if (validate_gue_flags(guehdr, optlen)) return -EINVAL; /* Handling exceptions for direct UDP encapsulation in GUE would lead to * recursion. Besides, this kind of encapsulation can't even be * configured currently. Discard this. */ if (guehdr->proto_ctype == IPPROTO_UDP || guehdr->proto_ctype == IPPROTO_UDPLITE) return -EOPNOTSUPP; skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr)); ret = gue6_err_proto_handler(guehdr->proto_ctype, skb, opt, type, code, offset, info); out: skb_set_transport_header(skb, transport_offset); return ret; } static const struct ip6_tnl_encap_ops fou_ip6tun_ops = { .encap_hlen = fou_encap_hlen, .build_header = fou6_build_header, .err_handler = gue6_err, }; static const struct ip6_tnl_encap_ops gue_ip6tun_ops = { .encap_hlen = gue_encap_hlen, .build_header = gue6_build_header, .err_handler = gue6_err, }; static int ip6_tnl_encap_add_fou_ops(void) { int ret; ret = ip6_tnl_encap_add_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU); if (ret < 0) { pr_err("can't add fou6 ops\n"); return ret; } ret = ip6_tnl_encap_add_ops(&gue_ip6tun_ops, TUNNEL_ENCAP_GUE); if (ret < 0) { pr_err("can't add gue6 ops\n"); ip6_tnl_encap_del_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU); return ret; } return 0; } static void ip6_tnl_encap_del_fou_ops(void) { ip6_tnl_encap_del_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU); ip6_tnl_encap_del_ops(&gue_ip6tun_ops, TUNNEL_ENCAP_GUE); } #else static int ip6_tnl_encap_add_fou_ops(void) { return 0; } static void ip6_tnl_encap_del_fou_ops(void) { } #endif static int __init fou6_init(void) { int ret; ret = ip6_tnl_encap_add_fou_ops(); return ret; } static void __exit fou6_fini(void) { ip6_tnl_encap_del_fou_ops(); } module_init(fou6_init); module_exit(fou6_fini); MODULE_AUTHOR("Tom Herbert <therbert@google.com>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Foo over UDP (IPv6)"); |
| 1 1 570 457 122 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 | // SPDX-License-Identifier: GPL-2.0-only /* * Generic HDLC support routines for Linux * * Copyright (C) 1999 - 2008 Krzysztof Halasa <khc@pm.waw.pl> * * Currently supported: * * raw IP-in-HDLC * * Cisco HDLC * * Frame Relay with ANSI or CCITT LMI (both user and network side) * * PPP * * X.25 * * Use sethdlc utility to set line parameters, protocol and PVCs * * How does it work: * - proto->open(), close(), start(), stop() calls are serialized. * The order is: open, [ start, stop ... ] close ... * - proto->start() and stop() are called with spin_lock_irq held. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/errno.h> #include <linux/hdlc.h> #include <linux/if_arp.h> #include <linux/inetdevice.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/notifier.h> #include <linux/pkt_sched.h> #include <linux/poll.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <net/net_namespace.h> static const char *version = "HDLC support module revision 1.22"; #undef DEBUG_LINK static struct hdlc_proto *first_proto; static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) { struct hdlc_device *hdlc; /* First make sure "dev" is an HDLC device */ if (!(dev->priv_flags & IFF_WAN_HDLC)) { kfree_skb(skb); return NET_RX_SUCCESS; } hdlc = dev_to_hdlc(dev); if (!net_eq(dev_net(dev), &init_net)) { kfree_skb(skb); return 0; } BUG_ON(!hdlc->proto->netif_rx); return hdlc->proto->netif_rx(skb); } netdev_tx_t hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); if (hdlc->proto->xmit) return hdlc->proto->xmit(skb, dev); return hdlc->xmit(skb, dev); /* call hardware driver directly */ } EXPORT_SYMBOL(hdlc_start_xmit); static inline void hdlc_proto_start(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); if (hdlc->proto->start) hdlc->proto->start(dev); } static inline void hdlc_proto_stop(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); if (hdlc->proto->stop) hdlc->proto->stop(dev); } static int hdlc_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); hdlc_device *hdlc; unsigned long flags; int on; if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (!(dev->priv_flags & IFF_WAN_HDLC)) return NOTIFY_DONE; /* not an HDLC device */ if (event != NETDEV_CHANGE) return NOTIFY_DONE; /* Only interested in carrier changes */ on = netif_carrier_ok(dev); #ifdef DEBUG_LINK printk(KERN_DEBUG "%s: hdlc_device_event NETDEV_CHANGE, carrier %i\n", dev->name, on); #endif hdlc = dev_to_hdlc(dev); spin_lock_irqsave(&hdlc->state_lock, flags); if (hdlc->carrier == on) goto carrier_exit; /* no change in DCD line level */ hdlc->carrier = on; if (!hdlc->open) goto carrier_exit; if (hdlc->carrier) { netdev_info(dev, "Carrier detected\n"); hdlc_proto_start(dev); } else { netdev_info(dev, "Carrier lost\n"); hdlc_proto_stop(dev); } carrier_exit: spin_unlock_irqrestore(&hdlc->state_lock, flags); return NOTIFY_DONE; } /* Must be called by hardware driver when HDLC device is being opened */ int hdlc_open(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); #ifdef DEBUG_LINK printk(KERN_DEBUG "%s: hdlc_open() carrier %i open %i\n", dev->name, hdlc->carrier, hdlc->open); #endif if (!hdlc->proto) return -ENOSYS; /* no protocol attached */ if (hdlc->proto->open) { int result = hdlc->proto->open(dev); if (result) return result; } spin_lock_irq(&hdlc->state_lock); if (hdlc->carrier) { netdev_info(dev, "Carrier detected\n"); hdlc_proto_start(dev); } else { netdev_info(dev, "No carrier\n"); } hdlc->open = 1; spin_unlock_irq(&hdlc->state_lock); return 0; } EXPORT_SYMBOL(hdlc_open); /* Must be called by hardware driver when HDLC device is being closed */ void hdlc_close(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); #ifdef DEBUG_LINK printk(KERN_DEBUG "%s: hdlc_close() carrier %i open %i\n", dev->name, hdlc->carrier, hdlc->open); #endif spin_lock_irq(&hdlc->state_lock); hdlc->open = 0; if (hdlc->carrier) hdlc_proto_stop(dev); spin_unlock_irq(&hdlc->state_lock); if (hdlc->proto->close) hdlc->proto->close(dev); } EXPORT_SYMBOL(hdlc_close); int hdlc_ioctl(struct net_device *dev, struct if_settings *ifs) { struct hdlc_proto *proto = first_proto; int result; if (dev_to_hdlc(dev)->proto) { result = dev_to_hdlc(dev)->proto->ioctl(dev, ifs); if (result != -EINVAL) return result; } /* Not handled by currently attached protocol (if any) */ while (proto) { result = proto->ioctl(dev, ifs); if (result != -EINVAL) return result; proto = proto->next; } return -EINVAL; } EXPORT_SYMBOL(hdlc_ioctl); static const struct header_ops hdlc_null_ops; static void hdlc_setup_dev(struct net_device *dev) { /* Re-init all variables changed by HDLC protocol drivers, * including ether_setup() called from hdlc_raw_eth.c. */ dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->priv_flags = IFF_WAN_HDLC; dev->mtu = HDLC_MAX_MTU; dev->min_mtu = 68; dev->max_mtu = HDLC_MAX_MTU; dev->type = ARPHRD_RAWHDLC; dev->hard_header_len = 0; dev->needed_headroom = 0; dev->addr_len = 0; dev->header_ops = &hdlc_null_ops; } static void hdlc_setup(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); hdlc_setup_dev(dev); hdlc->carrier = 1; hdlc->open = 0; spin_lock_init(&hdlc->state_lock); } struct net_device *alloc_hdlcdev(void *priv) { struct net_device *dev; dev = alloc_netdev(sizeof(struct hdlc_device), "hdlc%d", NET_NAME_UNKNOWN, hdlc_setup); if (dev) dev_to_hdlc(dev)->priv = priv; return dev; } EXPORT_SYMBOL(alloc_hdlcdev); void unregister_hdlc_device(struct net_device *dev) { rtnl_lock(); detach_hdlc_protocol(dev); unregister_netdevice(dev); rtnl_unlock(); } EXPORT_SYMBOL(unregister_hdlc_device); int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto, size_t size) { int err; err = detach_hdlc_protocol(dev); if (err) return err; if (!try_module_get(proto->module)) return -ENOSYS; if (size) { dev_to_hdlc(dev)->state = kmalloc(size, GFP_KERNEL); if (!dev_to_hdlc(dev)->state) { module_put(proto->module); return -ENOBUFS; } } dev_to_hdlc(dev)->proto = proto; return 0; } EXPORT_SYMBOL(attach_hdlc_protocol); int detach_hdlc_protocol(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); int err; if (hdlc->proto) { err = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, dev); err = notifier_to_errno(err); if (err) { netdev_err(dev, "Refused to change device type\n"); return err; } if (hdlc->proto->detach) hdlc->proto->detach(dev); module_put(hdlc->proto->module); hdlc->proto = NULL; } kfree(hdlc->state); hdlc->state = NULL; hdlc_setup_dev(dev); return 0; } EXPORT_SYMBOL(detach_hdlc_protocol); void register_hdlc_protocol(struct hdlc_proto *proto) { rtnl_lock(); proto->next = first_proto; first_proto = proto; rtnl_unlock(); } EXPORT_SYMBOL(register_hdlc_protocol); void unregister_hdlc_protocol(struct hdlc_proto *proto) { struct hdlc_proto **p; rtnl_lock(); p = &first_proto; while (*p != proto) { BUG_ON(!*p); p = &((*p)->next); } *p = proto->next; rtnl_unlock(); } EXPORT_SYMBOL(unregister_hdlc_protocol); MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>"); MODULE_DESCRIPTION("HDLC support module"); MODULE_LICENSE("GPL v2"); static struct packet_type hdlc_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_HDLC), .func = hdlc_rcv, }; static struct notifier_block hdlc_notifier = { .notifier_call = hdlc_device_event, }; static int __init hdlc_module_init(void) { int result; pr_info("%s\n", version); result = register_netdevice_notifier(&hdlc_notifier); if (result) return result; dev_add_pack(&hdlc_packet_type); return 0; } static void __exit hdlc_module_exit(void) { dev_remove_pack(&hdlc_packet_type); unregister_netdevice_notifier(&hdlc_notifier); } module_init(hdlc_module_init); module_exit(hdlc_module_exit); |
| 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | // SPDX-License-Identifier: GPL-2.0 /* * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum * (SSE2 accelerated version) * * Copyright 2018 Google LLC */ #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <crypto/nhpoly1305.h> #include <linux/module.h> #include <linux/sizes.h> #include <asm/simd.h> asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len, __le64 hash[NH_NUM_PASSES]); static int nhpoly1305_sse2_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { if (srclen < 64 || !crypto_simd_usable()) return crypto_nhpoly1305_update(desc, src, srclen); do { unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_fpu_begin(); crypto_nhpoly1305_update_helper(desc, src, n, nh_sse2); kernel_fpu_end(); src += n; srclen -= n; } while (srclen); return 0; } static int nhpoly1305_sse2_digest(struct shash_desc *desc, const u8 *src, unsigned int srclen, u8 *out) { return crypto_nhpoly1305_init(desc) ?: nhpoly1305_sse2_update(desc, src, srclen) ?: crypto_nhpoly1305_final(desc, out); } static struct shash_alg nhpoly1305_alg = { .base.cra_name = "nhpoly1305", .base.cra_driver_name = "nhpoly1305-sse2", .base.cra_priority = 200, .base.cra_ctxsize = sizeof(struct nhpoly1305_key), .base.cra_module = THIS_MODULE, .digestsize = POLY1305_DIGEST_SIZE, .init = crypto_nhpoly1305_init, .update = nhpoly1305_sse2_update, .final = crypto_nhpoly1305_final, .digest = nhpoly1305_sse2_digest, .setkey = crypto_nhpoly1305_setkey, .descsize = sizeof(struct nhpoly1305_state), }; static int __init nhpoly1305_mod_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2)) return -ENODEV; return crypto_register_shash(&nhpoly1305_alg); } static void __exit nhpoly1305_mod_exit(void) { crypto_unregister_shash(&nhpoly1305_alg); } module_init(nhpoly1305_mod_init); module_exit(nhpoly1305_mod_exit); MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (SSE2-accelerated)"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>"); MODULE_ALIAS_CRYPTO("nhpoly1305"); MODULE_ALIAS_CRYPTO("nhpoly1305-sse2"); |
| 125 71 368 33 12 59 59 59 59 59 52 19 11 6 7 16 5 5 5 5 5 8 5 7 8 6 4 28 2 1532 162 1012 1011 1004 1010 6622 6617 6618 119 2 2 2 5 1259 1239 69 21 2076 2056 68 19 9 15 16 998 41 531 44 1 155 20 122 81 27 11 5 167 167 191 177 18 15 2 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* internal.h: mm/ internal definitions * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef __MM_INTERNAL_H #define __MM_INTERNAL_H #include <linux/fs.h> #include <linux/khugepaged.h> #include <linux/mm.h> #include <linux/mm_inline.h> #include <linux/pagemap.h> #include <linux/pagewalk.h> #include <linux/rmap.h> #include <linux/swap.h> #include <linux/leafops.h> #include <linux/swap_cgroup.h> #include <linux/tracepoint-defs.h> /* Internal core VMA manipulation functions. */ #include "vma.h" struct folio_batch; /* * Maintains state across a page table move. The operation assumes both source * and destination VMAs already exist and are specified by the user. * * Partial moves are permitted, but the old and new ranges must both reside * within a VMA. * * mmap lock must be held in write and VMA write locks must be held on any VMA * that is visible. * * Use the PAGETABLE_MOVE() macro to initialise this struct. * * The old_addr and new_addr fields are updated as the page table move is * executed. * * NOTE: The page table move is affected by reading from [old_addr, old_end), * and old_addr may be updated for better page table alignment, so len_in * represents the length of the range being copied as specified by the user. */ struct pagetable_move_control { struct vm_area_struct *old; /* Source VMA. */ struct vm_area_struct *new; /* Destination VMA. */ unsigned long old_addr; /* Address from which the move begins. */ unsigned long old_end; /* Exclusive address at which old range ends. */ unsigned long new_addr; /* Address to move page tables to. */ unsigned long len_in; /* Bytes to remap specified by user. */ bool need_rmap_locks; /* Do rmap locks need to be taken? */ bool for_stack; /* Is this an early temp stack being moved? */ }; #define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \ struct pagetable_move_control name = { \ .old = old_, \ .new = new_, \ .old_addr = old_addr_, \ .old_end = (old_addr_) + (len_), \ .new_addr = new_addr_, \ .len_in = len_, \ } /* * The set of flags that only affect watermark checking and reclaim * behaviour. This is used by the MM to obey the caller constraints * about IO, FS and watermark checking while ignoring placement * hints such as HIGHMEM usage. */ #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\ __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\ __GFP_NOLOCKDEP) /* The GFP flags allowed during early boot */ #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) /* Control allocation cpuset and node placement constraints */ #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) /* Do not use these with a slab allocator */ #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) /* * Different from WARN_ON_ONCE(), no warning will be issued * when we specify __GFP_NOWARN. */ #define WARN_ON_ONCE_GFP(cond, gfp) ({ \ static bool __section(".data..once") __warned; \ int __ret_warn_once = !!(cond); \ \ if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \ __warned = true; \ WARN_ON(1); \ } \ unlikely(__ret_warn_once); \ }) void page_writeback_init(void); /* * If a 16GB hugetlb folio were mapped by PTEs of all of its 4kB pages, * its nr_pages_mapped would be 0x400000: choose the ENTIRELY_MAPPED bit * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE). Hugetlb currently * leaves nr_pages_mapped at 0, but avoid surprise if it participates later. */ #define ENTIRELY_MAPPED 0x800000 #define FOLIO_PAGES_MAPPED (ENTIRELY_MAPPED - 1) /* * Flags passed to __show_mem() and show_free_areas() to suppress output in * various contexts. */ #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ /* * How many individual pages have an elevated _mapcount. Excludes * the folio's entire_mapcount. * * Don't use this function outside of debugging code. */ static inline int folio_nr_pages_mapped(const struct folio *folio) { if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) return -1; return atomic_read(&folio->_nr_pages_mapped) & FOLIO_PAGES_MAPPED; } /* * Retrieve the first entry of a folio based on a provided entry within the * folio. We cannot rely on folio->swap as there is no guarantee that it has * been initialized. Used for calling arch_swap_restore() */ static inline swp_entry_t folio_swap(swp_entry_t entry, const struct folio *folio) { swp_entry_t swap = { .val = ALIGN_DOWN(entry.val, folio_nr_pages(folio)), }; return swap; } static inline void *folio_raw_mapping(const struct folio *folio) { unsigned long mapping = (unsigned long)folio->mapping; return (void *)(mapping & ~FOLIO_MAPPING_FLAGS); } /* * This is a file-backed mapping, and is about to be memory mapped - invoke its * mmap hook and safely handle error conditions. On error, VMA hooks will be * mutated. * * @file: File which backs the mapping. * @vma: VMA which we are mapping. * * Returns: 0 if success, error otherwise. */ static inline int mmap_file(struct file *file, struct vm_area_struct *vma) { int err = vfs_mmap(file, vma); if (likely(!err)) return 0; /* * OK, we tried to call the file hook for mmap(), but an error * arose. The mapping is in an inconsistent state and we most not invoke * any further hooks on it. */ vma->vm_ops = &vma_dummy_vm_ops; return err; } /* * If the VMA has a close hook then close it, and since closing it might leave * it in an inconsistent state which makes the use of any hooks suspect, clear * them down by installing dummy empty hooks. */ static inline void vma_close(struct vm_area_struct *vma) { if (vma->vm_ops && vma->vm_ops->close) { vma->vm_ops->close(vma); /* * The mapping is in an inconsistent state, and no further hooks * may be invoked upon it. */ vma->vm_ops = &vma_dummy_vm_ops; } } #ifdef CONFIG_MMU /* Flags for folio_pte_batch(). */ typedef int __bitwise fpb_t; /* Compare PTEs respecting the dirty bit. */ #define FPB_RESPECT_DIRTY ((__force fpb_t)BIT(0)) /* Compare PTEs respecting the soft-dirty bit. */ #define FPB_RESPECT_SOFT_DIRTY ((__force fpb_t)BIT(1)) /* Compare PTEs respecting the writable bit. */ #define FPB_RESPECT_WRITE ((__force fpb_t)BIT(2)) /* * Merge PTE write bits: if any PTE in the batch is writable, modify the * PTE at @ptentp to be writable. */ #define FPB_MERGE_WRITE ((__force fpb_t)BIT(3)) /* * Merge PTE young and dirty bits: if any PTE in the batch is young or dirty, * modify the PTE at @ptentp to be young or dirty, respectively. */ #define FPB_MERGE_YOUNG_DIRTY ((__force fpb_t)BIT(4)) static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags) { if (!(flags & FPB_RESPECT_DIRTY)) pte = pte_mkclean(pte); if (likely(!(flags & FPB_RESPECT_SOFT_DIRTY))) pte = pte_clear_soft_dirty(pte); if (likely(!(flags & FPB_RESPECT_WRITE))) pte = pte_wrprotect(pte); return pte_mkold(pte); } /** * folio_pte_batch_flags - detect a PTE batch for a large folio * @folio: The large folio to detect a PTE batch for. * @vma: The VMA. Only relevant with FPB_MERGE_WRITE, otherwise can be NULL. * @ptep: Page table pointer for the first entry. * @ptentp: Pointer to a COPY of the first page table entry whose flags this * function updates based on @flags if appropriate. * @max_nr: The maximum number of table entries to consider. * @flags: Flags to modify the PTE batch semantics. * * Detect a PTE batch: consecutive (present) PTEs that map consecutive * pages of the same large folio in a single VMA and a single page table. * * All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN, * the accessed bit, writable bit, dirty bit (unless FPB_RESPECT_DIRTY is set) * and soft-dirty bit (unless FPB_RESPECT_SOFT_DIRTY is set). * * @ptep must map any page of the folio. max_nr must be at least one and * must be limited by the caller so scanning cannot exceed a single VMA and * a single page table. * * Depending on the FPB_MERGE_* flags, the pte stored at @ptentp will * be updated: it's crucial that a pointer to a COPY of the first * page table entry, obtained through ptep_get(), is provided as @ptentp. * * This function will be inlined to optimize based on the input parameters; * consider using folio_pte_batch() instead if applicable. * * Return: the number of table entries in the batch. */ static inline unsigned int folio_pte_batch_flags(struct folio *folio, struct vm_area_struct *vma, pte_t *ptep, pte_t *ptentp, unsigned int max_nr, fpb_t flags) { bool any_writable = false, any_young = false, any_dirty = false; pte_t expected_pte, pte = *ptentp; unsigned int nr, cur_nr; VM_WARN_ON_FOLIO(!pte_present(pte), folio); VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio); VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio); /* * Ensure this is a pointer to a copy not a pointer into a page table. * If this is a stack value, it won't be a valid virtual address, but * that's fine because it also cannot be pointing into the page table. */ VM_WARN_ON(virt_addr_valid(ptentp) && PageTable(virt_to_page(ptentp))); /* Limit max_nr to the actual remaining PFNs in the folio we could batch. */ max_nr = min_t(unsigned long, max_nr, folio_pfn(folio) + folio_nr_pages(folio) - pte_pfn(pte)); nr = pte_batch_hint(ptep, pte); expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags); ptep = ptep + nr; while (nr < max_nr) { pte = ptep_get(ptep); if (!pte_same(__pte_batch_clear_ignored(pte, flags), expected_pte)) break; if (flags & FPB_MERGE_WRITE) any_writable |= pte_write(pte); if (flags & FPB_MERGE_YOUNG_DIRTY) { any_young |= pte_young(pte); any_dirty |= pte_dirty(pte); } cur_nr = pte_batch_hint(ptep, pte); expected_pte = pte_advance_pfn(expected_pte, cur_nr); ptep += cur_nr; nr += cur_nr; } if (any_writable) *ptentp = pte_mkwrite(*ptentp, vma); if (any_young) *ptentp = pte_mkyoung(*ptentp); if (any_dirty) *ptentp = pte_mkdirty(*ptentp); return min(nr, max_nr); } unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte, unsigned int max_nr); /** * pte_move_swp_offset - Move the swap entry offset field of a swap pte * forward or backward by delta * @pte: The initial pte state; must be a swap entry * @delta: The direction and the offset we are moving; forward if delta * is positive; backward if delta is negative * * Moves the swap offset, while maintaining all other fields, including * swap type, and any swp pte bits. The resulting pte is returned. */ static inline pte_t pte_move_swp_offset(pte_t pte, long delta) { const softleaf_t entry = softleaf_from_pte(pte); pte_t new = __swp_entry_to_pte(__swp_entry(swp_type(entry), (swp_offset(entry) + delta))); if (pte_swp_soft_dirty(pte)) new = pte_swp_mksoft_dirty(new); if (pte_swp_exclusive(pte)) new = pte_swp_mkexclusive(new); if (pte_swp_uffd_wp(pte)) new = pte_swp_mkuffd_wp(new); return new; } /** * pte_next_swp_offset - Increment the swap entry offset field of a swap pte. * @pte: The initial pte state; must be a swap entry. * * Increments the swap offset, while maintaining all other fields, including * swap type, and any swp pte bits. The resulting pte is returned. */ static inline pte_t pte_next_swp_offset(pte_t pte) { return pte_move_swp_offset(pte, 1); } /** * swap_pte_batch - detect a PTE batch for a set of contiguous swap entries * @start_ptep: Page table pointer for the first entry. * @max_nr: The maximum number of table entries to consider. * @pte: Page table entry for the first entry. * * Detect a batch of contiguous swap entries: consecutive (non-present) PTEs * containing swap entries all with consecutive offsets and targeting the same * swap type, all with matching swp pte bits. * * max_nr must be at least one and must be limited by the caller so scanning * cannot exceed a single page table. * * Return: the number of table entries in the batch. */ static inline int swap_pte_batch(pte_t *start_ptep, int max_nr, pte_t pte) { pte_t expected_pte = pte_next_swp_offset(pte); const pte_t *end_ptep = start_ptep + max_nr; const softleaf_t entry = softleaf_from_pte(pte); pte_t *ptep = start_ptep + 1; unsigned short cgroup_id; VM_WARN_ON(max_nr < 1); VM_WARN_ON(!softleaf_is_swap(entry)); cgroup_id = lookup_swap_cgroup_id(entry); while (ptep < end_ptep) { softleaf_t entry; pte = ptep_get(ptep); if (!pte_same(pte, expected_pte)) break; entry = softleaf_from_pte(pte); if (lookup_swap_cgroup_id(entry) != cgroup_id) break; expected_pte = pte_next_swp_offset(expected_pte); ptep++; } return ptep - start_ptep; } #endif /* CONFIG_MMU */ void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio, int nr_throttled); static inline void acct_reclaim_writeback(struct folio *folio) { pg_data_t *pgdat = folio_pgdat(folio); int nr_throttled = atomic_read(&pgdat->nr_writeback_throttled); if (nr_throttled) __acct_reclaim_writeback(pgdat, folio, nr_throttled); } static inline void wake_throttle_isolated(pg_data_t *pgdat) { wait_queue_head_t *wqh; wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED]; if (waitqueue_active(wqh)) wake_up(wqh); } vm_fault_t __vmf_anon_prepare(struct vm_fault *vmf); static inline vm_fault_t vmf_anon_prepare(struct vm_fault *vmf) { vm_fault_t ret = __vmf_anon_prepare(vmf); if (unlikely(ret & VM_FAULT_RETRY)) vma_end_read(vmf->vma); return ret; } vm_fault_t do_swap_page(struct vm_fault *vmf); void folio_rotate_reclaimable(struct folio *folio); bool __folio_end_writeback(struct folio *folio); void deactivate_file_folio(struct folio *folio); void folio_activate(struct folio *folio); void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling, bool mm_wr_locked); void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte); struct zap_details; void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, struct zap_details *details); void zap_page_range_single_batched(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long size, struct zap_details *details); int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio, gfp_t gfp); void page_cache_ra_order(struct readahead_control *, struct file_ra_state *); void force_page_cache_ra(struct readahead_control *, unsigned long nr); static inline void force_page_cache_readahead(struct address_space *mapping, struct file *file, pgoff_t index, unsigned long nr_to_read) { DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, index); force_page_cache_ra(&ractl, nr_to_read); } unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); unsigned find_get_entries(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); void filemap_free_folio(struct address_space *mapping, struct folio *folio); int truncate_inode_folio(struct address_space *mapping, struct folio *folio); bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end); long mapping_evict_folio(struct address_space *mapping, struct folio *folio); unsigned long mapping_try_invalidate(struct address_space *mapping, pgoff_t start, pgoff_t end, unsigned long *nr_failed); /** * folio_evictable - Test whether a folio is evictable. * @folio: The folio to test. * * Test whether @folio is evictable -- i.e., should be placed on * active/inactive lists vs unevictable list. * * Reasons folio might not be evictable: * 1. folio's mapping marked unevictable * 2. One of the pages in the folio is part of an mlocked VMA */ static inline bool folio_evictable(struct folio *folio) { bool ret; /* Prevent address_space of inode and swap cache from being freed */ rcu_read_lock(); ret = !mapping_unevictable(folio_mapping(folio)) && !folio_test_mlocked(folio); rcu_read_unlock(); return ret; } /* * Turn a non-refcounted page (->_refcount == 0) into refcounted with * a count of one. */ static inline void set_page_refcounted(struct page *page) { VM_BUG_ON_PAGE(PageTail(page), page); VM_BUG_ON_PAGE(page_ref_count(page), page); set_page_count(page, 1); } /* * Return true if a folio needs ->release_folio() calling upon it. */ static inline bool folio_needs_release(struct folio *folio) { struct address_space *mapping = folio_mapping(folio); return folio_has_private(folio) || (mapping && mapping_release_always(mapping)); } extern unsigned long highest_memmap_pfn; /* * Maximum number of reclaim retries without progress before the OOM * killer is consider the only way forward. */ #define MAX_RECLAIM_RETRIES 16 /* * in mm/vmscan.c: */ bool folio_isolate_lru(struct folio *folio); void folio_putback_lru(struct folio *folio); extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason); #ifdef CONFIG_NUMA int user_proactive_reclaim(char *buf, struct mem_cgroup *memcg, pg_data_t *pgdat); #else static inline int user_proactive_reclaim(char *buf, struct mem_cgroup *memcg, pg_data_t *pgdat) { return 0; } #endif /* * in mm/rmap.c: */ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address); /* * in mm/page_alloc.c */ #define K(x) ((x) << (PAGE_SHIFT-10)) extern char * const zone_names[MAX_NR_ZONES]; /* perform sanity checks on struct pages being allocated or freed */ DECLARE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled); extern int min_free_kbytes; extern int defrag_mode; void setup_per_zone_wmarks(void); void calculate_min_free_kbytes(void); int __meminit init_per_zone_wmark_min(void); void page_alloc_sysctl_init(void); /* * Structure for holding the mostly immutable allocation parameters passed * between functions involved in allocations, including the alloc_pages* * family of functions. * * nodemask, migratetype and highest_zoneidx are initialized only once in * __alloc_pages() and then never change. * * zonelist, preferred_zone and highest_zoneidx are set first in * __alloc_pages() for the fast path, and might be later changed * in __alloc_pages_slowpath(). All other functions pass the whole structure * by a const pointer. */ struct alloc_context { struct zonelist *zonelist; nodemask_t *nodemask; struct zoneref *preferred_zoneref; int migratetype; /* * highest_zoneidx represents highest usable zone index of * the allocation request. Due to the nature of the zone, * memory on lower zone than the highest_zoneidx will be * protected by lowmem_reserve[highest_zoneidx]. * * highest_zoneidx is also used by reclaim/compaction to limit * the target zone since higher zone than this index cannot be * usable for this allocation request. */ enum zone_type highest_zoneidx; bool spread_dirty_pages; }; /* * This function returns the order of a free page in the buddy system. In * general, page_zone(page)->lock must be held by the caller to prevent the * page from being allocated in parallel and returning garbage as the order. * If a caller does not hold page_zone(page)->lock, it must guarantee that the * page cannot be allocated or merged in parallel. Alternatively, it must * handle invalid values gracefully, and use buddy_order_unsafe() below. */ static inline unsigned int buddy_order(struct page *page) { /* PageBuddy() must be checked by the caller */ return page_private(page); } /* * Like buddy_order(), but for callers who cannot afford to hold the zone lock. * PageBuddy() should be checked first by the caller to minimize race window, * and invalid values must be handled gracefully. * * READ_ONCE is used so that if the caller assigns the result into a local * variable and e.g. tests it for valid range before using, the compiler cannot * decide to remove the variable and inline the page_private(page) multiple * times, potentially observing different values in the tests and the actual * use of the result. */ #define buddy_order_unsafe(page) READ_ONCE(page_private(page)) /* * This function checks whether a page is free && is the buddy * we can coalesce a page and its buddy if * (a) the buddy is not in a hole (check before calling!) && * (b) the buddy is in the buddy system && * (c) a page and its buddy have the same order && * (d) a page and its buddy are in the same zone. * * For recording whether a page is in the buddy system, we set PageBuddy. * Setting, clearing, and testing PageBuddy is serialized by zone->lock. * * For recording page's order, we use page_private(page). */ static inline bool page_is_buddy(struct page *page, struct page *buddy, unsigned int order) { if (!page_is_guard(buddy) && !PageBuddy(buddy)) return false; if (buddy_order(buddy) != order) return false; /* * zone check is done late to avoid uselessly calculating * zone/node ids for pages that could never merge. */ if (page_zone_id(page) != page_zone_id(buddy)) return false; VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); return true; } /* * Locate the struct page for both the matching buddy in our * pair (buddy1) and the combined O(n+1) page they form (page). * * 1) Any buddy B1 will have an order O twin B2 which satisfies * the following equation: * B2 = B1 ^ (1 << O) * For example, if the starting buddy (buddy2) is #8 its order * 1 buddy is #10: * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 * * 2) Any buddy B will have an order O+1 parent P which * satisfies the following equation: * P = B & ~(1 << O) * * Assumption: *_mem_map is contiguous at least up to MAX_PAGE_ORDER */ static inline unsigned long __find_buddy_pfn(unsigned long page_pfn, unsigned int order) { return page_pfn ^ (1 << order); } /* * Find the buddy of @page and validate it. * @page: The input page * @pfn: The pfn of the page, it saves a call to page_to_pfn() when the * function is used in the performance-critical __free_one_page(). * @order: The order of the page * @buddy_pfn: The output pointer to the buddy pfn, it also saves a call to * page_to_pfn(). * * The found buddy can be a non PageBuddy, out of @page's zone, or its order is * not the same as @page. The validation is necessary before use it. * * Return: the found buddy page or NULL if not found. */ static inline struct page *find_buddy_page_pfn(struct page *page, unsigned long pfn, unsigned int order, unsigned long *buddy_pfn) { unsigned long __buddy_pfn = __find_buddy_pfn(pfn, order); struct page *buddy; buddy = page + (__buddy_pfn - pfn); if (buddy_pfn) *buddy_pfn = __buddy_pfn; if (page_is_buddy(page, buddy, order)) return buddy; return NULL; } extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn, unsigned long end_pfn, struct zone *zone); static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn, unsigned long end_pfn, struct zone *zone) { if (zone->contiguous) return pfn_to_page(start_pfn); return __pageblock_pfn_to_page(start_pfn, end_pfn, zone); } void set_zone_contiguous(struct zone *zone); bool pfn_range_intersects_zones(int nid, unsigned long start_pfn, unsigned long nr_pages); static inline void clear_zone_contiguous(struct zone *zone) { zone->contiguous = false; } extern int __isolate_free_page(struct page *page, unsigned int order); extern void __putback_isolated_page(struct page *page, unsigned int order, int mt); extern void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order); extern void __free_pages_core(struct page *page, unsigned int order, enum meminit_context context); /* * This will have no effect, other than possibly generating a warning, if the * caller passes in a non-large folio. */ static inline void folio_set_order(struct folio *folio, unsigned int order) { if (WARN_ON_ONCE(!order || !folio_test_large(folio))) return; VM_WARN_ON_ONCE(order > MAX_FOLIO_ORDER); folio->_flags_1 = (folio->_flags_1 & ~0xffUL) | order; #ifdef NR_PAGES_IN_LARGE_FOLIO folio->_nr_pages = 1U << order; #endif } bool __folio_unqueue_deferred_split(struct folio *folio); static inline bool folio_unqueue_deferred_split(struct folio *folio) { if (folio_order(folio) <= 1 || !folio_test_large_rmappable(folio)) return false; /* * At this point, there is no one trying to add the folio to * deferred_list. If folio is not in deferred_list, it's safe * to check without acquiring the split_queue_lock. */ if (data_race(list_empty(&folio->_deferred_list))) return false; return __folio_unqueue_deferred_split(folio); } static inline struct folio *page_rmappable_folio(struct page *page) { struct folio *folio = (struct folio *)page; if (folio && folio_test_large(folio)) folio_set_large_rmappable(folio); return folio; } static inline void prep_compound_head(struct page *page, unsigned int order) { struct folio *folio = (struct folio *)page; folio_set_order(folio, order); atomic_set(&folio->_large_mapcount, -1); if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) atomic_set(&folio->_nr_pages_mapped, 0); if (IS_ENABLED(CONFIG_MM_ID)) { folio->_mm_ids = 0; folio->_mm_id_mapcount[0] = -1; folio->_mm_id_mapcount[1] = -1; } if (IS_ENABLED(CONFIG_64BIT) || order > 1) { atomic_set(&folio->_pincount, 0); atomic_set(&folio->_entire_mapcount, -1); } if (order > 1) INIT_LIST_HEAD(&folio->_deferred_list); } static inline void prep_compound_tail(struct page *head, int tail_idx) { struct page *p = head + tail_idx; p->mapping = TAIL_MAPPING; set_compound_head(p, head); set_page_private(p, 0); } void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); extern bool free_pages_prepare(struct page *page, unsigned int order); extern int user_min_free_kbytes; struct page *__alloc_frozen_pages_noprof(gfp_t, unsigned int order, int nid, nodemask_t *); #define __alloc_frozen_pages(...) \ alloc_hooks(__alloc_frozen_pages_noprof(__VA_ARGS__)) void free_frozen_pages(struct page *page, unsigned int order); void free_unref_folios(struct folio_batch *fbatch); #ifdef CONFIG_NUMA struct page *alloc_frozen_pages_noprof(gfp_t, unsigned int order); #else static inline struct page *alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order) { return __alloc_frozen_pages_noprof(gfp, order, numa_node_id(), NULL); } #endif #define alloc_frozen_pages(...) \ alloc_hooks(alloc_frozen_pages_noprof(__VA_ARGS__)) struct page *alloc_frozen_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order); #define alloc_frozen_pages_nolock(...) \ alloc_hooks(alloc_frozen_pages_nolock_noprof(__VA_ARGS__)) extern void zone_pcp_reset(struct zone *zone); extern void zone_pcp_disable(struct zone *zone); extern void zone_pcp_enable(struct zone *zone); extern void zone_pcp_init(struct zone *zone); extern void *memmap_alloc(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, int nid, bool exact_nid); void memmap_init_range(unsigned long, int, unsigned long, unsigned long, unsigned long, enum meminit_context, struct vmem_altmap *, int, bool); #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* * in mm/compaction.c */ /* * compact_control is used to track pages being migrated and the free pages * they are being migrated to during memory compaction. The free_pfn starts * at the end of a zone and migrate_pfn begins at the start. Movable pages * are moved to the end of a zone during a compaction run and the run * completes when free_pfn <= migrate_pfn */ struct compact_control { struct list_head freepages[NR_PAGE_ORDERS]; /* List of free pages to migrate to */ struct list_head migratepages; /* List of pages being migrated */ unsigned int nr_freepages; /* Number of isolated free pages */ unsigned int nr_migratepages; /* Number of pages to migrate */ unsigned long free_pfn; /* isolate_freepages search base */ /* * Acts as an in/out parameter to page isolation for migration. * isolate_migratepages uses it as a search base. * isolate_migratepages_block will update the value to the next pfn * after the last isolated one. */ unsigned long migrate_pfn; unsigned long fast_start_pfn; /* a pfn to start linear scan from */ struct zone *zone; unsigned long total_migrate_scanned; unsigned long total_free_scanned; unsigned short fast_search_fail;/* failures to use free list searches */ short search_order; /* order to start a fast search at */ const gfp_t gfp_mask; /* gfp mask of a direct compactor */ int order; /* order a direct compactor needs */ int migratetype; /* migratetype of direct compactor */ const unsigned int alloc_flags; /* alloc flags of a direct compactor */ const int highest_zoneidx; /* zone index of a direct compactor */ enum migrate_mode mode; /* Async or sync migration mode */ bool ignore_skip_hint; /* Scan blocks even if marked skip */ bool no_set_skip_hint; /* Don't mark blocks for skipping */ bool ignore_block_suitable; /* Scan blocks considered unsuitable */ bool direct_compaction; /* False from kcompactd or /proc/... */ bool proactive_compaction; /* kcompactd proactive compaction */ bool whole_zone; /* Whole zone should/has been scanned */ bool contended; /* Signal lock contention */ bool finish_pageblock; /* Scan the remainder of a pageblock. Used * when there are potentially transient * isolation or migration failures to * ensure forward progress. */ bool alloc_contig; /* alloc_contig_range allocation */ }; /* * Used in direct compaction when a page should be taken from the freelists * immediately when one is created during the free path. */ struct capture_control { struct compact_control *cc; struct page *page; }; unsigned long isolate_freepages_range(struct compact_control *cc, unsigned long start_pfn, unsigned long end_pfn); int isolate_migratepages_range(struct compact_control *cc, unsigned long low_pfn, unsigned long end_pfn); /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ void init_cma_reserved_pageblock(struct page *page); #endif /* CONFIG_COMPACTION || CONFIG_CMA */ struct cma; #ifdef CONFIG_CMA void *cma_reserve_early(struct cma *cma, unsigned long size); void init_cma_pageblock(struct page *page); #else static inline void *cma_reserve_early(struct cma *cma, unsigned long size) { return NULL; } static inline void init_cma_pageblock(struct page *page) { } #endif int find_suitable_fallback(struct free_area *area, unsigned int order, int migratetype, bool claimable); static inline bool free_area_empty(struct free_area *area, int migratetype) { return list_empty(&area->free_list[migratetype]); } /* mm/util.c */ struct anon_vma *folio_anon_vma(const struct folio *folio); #ifdef CONFIG_MMU void unmap_mapping_folio(struct folio *folio); extern long populate_vma_page_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, int *locked); extern long faultin_page_range(struct mm_struct *mm, unsigned long start, unsigned long end, bool write, int *locked); bool mlock_future_ok(const struct mm_struct *mm, vm_flags_t vm_flags, unsigned long bytes); /* * NOTE: This function can't tell whether the folio is "fully mapped" in the * range. * "fully mapped" means all the pages of folio is associated with the page * table of range while this function just check whether the folio range is * within the range [start, end). Function caller needs to do page table * check if it cares about the page table association. * * Typical usage (like mlock or madvise) is: * Caller knows at least 1 page of folio is associated with page table of VMA * and the range [start, end) is intersect with the VMA range. Caller wants * to know whether the folio is fully associated with the range. It calls * this function to check whether the folio is in the range first. Then checks * the page table to know whether the folio is fully mapped to the range. */ static inline bool folio_within_range(struct folio *folio, struct vm_area_struct *vma, unsigned long start, unsigned long end) { pgoff_t pgoff, addr; unsigned long vma_pglen = vma_pages(vma); VM_WARN_ON_FOLIO(folio_test_ksm(folio), folio); if (start > end) return false; if (start < vma->vm_start) start = vma->vm_start; if (end > vma->vm_end) end = vma->vm_end; pgoff = folio_pgoff(folio); /* if folio start address is not in vma range */ if (!in_range(pgoff, vma->vm_pgoff, vma_pglen)) return false; addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); return !(addr < start || end - addr < folio_size(folio)); } static inline bool folio_within_vma(struct folio *folio, struct vm_area_struct *vma) { return folio_within_range(folio, vma, vma->vm_start, vma->vm_end); } /* * mlock_vma_folio() and munlock_vma_folio(): * should be called with vma's mmap_lock held for read or write, * under page table lock for the pte/pmd being added or removed. * * mlock is usually called at the end of folio_add_*_rmap_*(), munlock at * the end of folio_remove_rmap_*(); but new anon folios are managed by * folio_add_lru_vma() calling mlock_new_folio(). */ void mlock_folio(struct folio *folio); static inline void mlock_vma_folio(struct folio *folio, struct vm_area_struct *vma) { /* * The VM_SPECIAL check here serves two purposes. * 1) VM_IO check prevents migration from double-counting during mlock. * 2) Although mmap_region() and mlock_fixup() take care that VM_LOCKED * is never left set on a VM_SPECIAL vma, there is an interval while * file->f_op->mmap() is using vm_insert_page(s), when VM_LOCKED may * still be set while VM_SPECIAL bits are added: so ignore it then. */ if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED)) mlock_folio(folio); } void munlock_folio(struct folio *folio); static inline void munlock_vma_folio(struct folio *folio, struct vm_area_struct *vma) { /* * munlock if the function is called. Ideally, we should only * do munlock if any page of folio is unmapped from VMA and * cause folio not fully mapped to VMA. * * But it's not easy to confirm that's the situation. So we * always munlock the folio and page reclaim will correct it * if it's wrong. */ if (unlikely(vma->vm_flags & VM_LOCKED)) munlock_folio(folio); } void mlock_new_folio(struct folio *folio); bool need_mlock_drain(int cpu); void mlock_drain_local(void); void mlock_drain_remote(int cpu); extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); /** * vma_address - Find the virtual address a page range is mapped at * @vma: The vma which maps this object. * @pgoff: The page offset within its object. * @nr_pages: The number of pages to consider. * * If any page in this range is mapped by this VMA, return the first address * where any of these pages appear. Otherwise, return -EFAULT. */ static inline unsigned long vma_address(const struct vm_area_struct *vma, pgoff_t pgoff, unsigned long nr_pages) { unsigned long address; if (pgoff >= vma->vm_pgoff) { address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); /* Check for address beyond vma (or wrapped through 0?) */ if (address < vma->vm_start || address >= vma->vm_end) address = -EFAULT; } else if (pgoff + nr_pages - 1 >= vma->vm_pgoff) { /* Test above avoids possibility of wrap to 0 on 32-bit */ address = vma->vm_start; } else { address = -EFAULT; } return address; } /* * Then at what user virtual address will none of the range be found in vma? * Assumes that vma_address() already returned a good starting address. */ static inline unsigned long vma_address_end(struct page_vma_mapped_walk *pvmw) { struct vm_area_struct *vma = pvmw->vma; pgoff_t pgoff; unsigned long address; /* Common case, plus ->pgoff is invalid for KSM */ if (pvmw->nr_pages == 1) return pvmw->address + PAGE_SIZE; pgoff = pvmw->pgoff + pvmw->nr_pages; address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); /* Check for address beyond vma (or wrapped through 0?) */ if (address < vma->vm_start || address > vma->vm_end) address = vma->vm_end; return address; } static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, struct file *fpin) { int flags = vmf->flags; if (fpin) return fpin; /* * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or * anything, so we only pin the file and drop the mmap_lock if only * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt. */ if (fault_flag_allow_retry_first(flags) && !(flags & FAULT_FLAG_RETRY_NOWAIT)) { fpin = get_file(vmf->vma->vm_file); release_fault_lock(vmf); } return fpin; } #else /* !CONFIG_MMU */ static inline void unmap_mapping_folio(struct folio *folio) { } static inline void mlock_new_folio(struct folio *folio) { } static inline bool need_mlock_drain(int cpu) { return false; } static inline void mlock_drain_local(void) { } static inline void mlock_drain_remote(int cpu) { } static inline void vunmap_range_noflush(unsigned long start, unsigned long end) { } #endif /* !CONFIG_MMU */ /* Memory initialisation debug and verification */ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT DECLARE_STATIC_KEY_TRUE(deferred_pages); bool __init deferred_grow_zone(struct zone *zone, unsigned int order); #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ void init_deferred_page(unsigned long pfn, int nid); enum mminit_level { MMINIT_WARNING, MMINIT_VERIFY, MMINIT_TRACE }; #ifdef CONFIG_DEBUG_MEMORY_INIT extern int mminit_loglevel; #define mminit_dprintk(level, prefix, fmt, arg...) \ do { \ if (level < mminit_loglevel) { \ if (level <= MMINIT_WARNING) \ pr_warn("mminit::" prefix " " fmt, ##arg); \ else \ printk(KERN_DEBUG "mminit::" prefix " " fmt, ##arg); \ } \ } while (0) extern void mminit_verify_pageflags_layout(void); extern void mminit_verify_zonelist(void); #else static inline void mminit_dprintk(enum mminit_level level, const char *prefix, const char *fmt, ...) { } static inline void mminit_verify_pageflags_layout(void) { } static inline void mminit_verify_zonelist(void) { } #endif /* CONFIG_DEBUG_MEMORY_INIT */ #define NODE_RECLAIM_NOSCAN -2 #define NODE_RECLAIM_FULL -1 #define NODE_RECLAIM_SOME 0 #define NODE_RECLAIM_SUCCESS 1 #ifdef CONFIG_NUMA extern int node_reclaim_mode; extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int); extern int find_next_best_node(int node, nodemask_t *used_node_mask); #else #define node_reclaim_mode 0 static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, unsigned int order) { return NODE_RECLAIM_NOSCAN; } static inline int find_next_best_node(int node, nodemask_t *used_node_mask) { return NUMA_NO_NODE; } #endif static inline bool node_reclaim_enabled(void) { /* Is any node_reclaim_mode bit set? */ return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP); } /* * mm/memory-failure.c */ #ifdef CONFIG_MEMORY_FAILURE int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill); void shake_folio(struct folio *folio); typedef int hwpoison_filter_func_t(struct page *p); void hwpoison_filter_register(hwpoison_filter_func_t *filter); void hwpoison_filter_unregister(void); #define MAGIC_HWPOISON 0x48575053U /* HWPS */ void SetPageHWPoisonTakenOff(struct page *page); void ClearPageHWPoisonTakenOff(struct page *page); bool take_page_off_buddy(struct page *page); bool put_page_back_buddy(struct page *page); struct task_struct *task_early_kill(struct task_struct *tsk, int force_early); void add_to_kill_ksm(struct task_struct *tsk, const struct page *p, struct vm_area_struct *vma, struct list_head *to_kill, unsigned long ksm_addr); unsigned long page_mapped_in_vma(const struct page *page, struct vm_area_struct *vma); #else static inline int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill) { return -EBUSY; } #endif extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern void set_pageblock_order(void); unsigned long reclaim_pages(struct list_head *folio_list); unsigned int reclaim_clean_pages_from_list(struct zone *zone, struct list_head *folio_list); /* The ALLOC_WMARK bits are used as an index to zone->watermark */ #define ALLOC_WMARK_MIN WMARK_MIN #define ALLOC_WMARK_LOW WMARK_LOW #define ALLOC_WMARK_HIGH WMARK_HIGH #define ALLOC_NO_WATERMARKS 0x04 /* don't check watermarks at all */ /* Mask to get the watermark bits */ #define ALLOC_WMARK_MASK (ALLOC_NO_WATERMARKS-1) /* * Only MMU archs have async oom victim reclaim - aka oom_reaper so we * cannot assume a reduced access to memory reserves is sufficient for * !MMU */ #ifdef CONFIG_MMU #define ALLOC_OOM 0x08 #else #define ALLOC_OOM ALLOC_NO_WATERMARKS #endif #define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access * to 25% of the min watermark or * 62.5% if __GFP_HIGH is set. */ #define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50% * of the min watermark. */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ #ifdef CONFIG_ZONE_DMA32 #define ALLOC_NOFRAGMENT 0x100 /* avoid mixing pageblock types */ #else #define ALLOC_NOFRAGMENT 0x0 #endif #define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */ #define ALLOC_TRYLOCK 0x400 /* Only use spin_trylock in allocation path */ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ /* Flags that allow allocations below the min watermark. */ #define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM) enum ttu_flags; struct tlbflush_unmap_batch; /* * only for MM internal work items which do not depend on * any allocations or locks which might depend on allocations */ extern struct workqueue_struct *mm_percpu_wq; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH void try_to_unmap_flush(void); void try_to_unmap_flush_dirty(void); void flush_tlb_batched_pending(struct mm_struct *mm); #else static inline void try_to_unmap_flush(void) { } static inline void try_to_unmap_flush_dirty(void) { } static inline void flush_tlb_batched_pending(struct mm_struct *mm) { } #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ extern const struct trace_print_flags pageflag_names[]; extern const struct trace_print_flags vmaflag_names[]; extern const struct trace_print_flags gfpflag_names[]; void setup_zone_pageset(struct zone *zone); struct migration_target_control { int nid; /* preferred node id */ nodemask_t *nmask; gfp_t gfp_mask; enum migrate_reason reason; }; /* * mm/filemap.c */ size_t splice_folio_into_pipe(struct pipe_inode_info *pipe, struct folio *folio, loff_t fpos, size_t size); /* * mm/vmalloc.c */ #ifdef CONFIG_MMU void __init vmalloc_init(void); int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift, gfp_t gfp_mask); unsigned int get_vm_area_page_order(struct vm_struct *vm); #else static inline void vmalloc_init(void) { } static inline int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift, gfp_t gfp_mask) { return -EINVAL; } #endif int __must_check __vmap_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift); void vunmap_range_noflush(unsigned long start, unsigned long end); void __vunmap_range_noflush(unsigned long start, unsigned long end); static inline bool vma_is_single_threaded_private(struct vm_area_struct *vma) { if (vma->vm_flags & VM_SHARED) return false; return atomic_read(&vma->vm_mm->mm_users) == 1; } #ifdef CONFIG_NUMA_BALANCING bool folio_can_map_prot_numa(struct folio *folio, struct vm_area_struct *vma, bool is_private_single_threaded); #else static inline bool folio_can_map_prot_numa(struct folio *folio, struct vm_area_struct *vma, bool is_private_single_threaded) { return false; } #endif int numa_migrate_check(struct folio *folio, struct vm_fault *vmf, unsigned long addr, int *flags, bool writable, int *last_cpupid); void free_zone_device_folio(struct folio *folio); int migrate_device_coherent_folio(struct folio *folio); struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long align, unsigned long shift, unsigned long vm_flags, unsigned long start, unsigned long end, int node, gfp_t gfp_mask, const void *caller); /* * mm/gup.c */ int __must_check try_grab_folio(struct folio *folio, int refs, unsigned int flags); /* * mm/huge_memory.c */ void touch_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud, bool write); bool touch_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, bool write); /* * Parses a string with mem suffixes into its order. Useful to parse kernel * parameters. */ static inline int get_order_from_str(const char *size_str, unsigned long valid_orders) { unsigned long size; char *endptr; int order; size = memparse(size_str, &endptr); if (!is_power_of_2(size)) return -EINVAL; order = get_order(size); if (BIT(order) & ~valid_orders) return -EINVAL; return order; } enum { /* mark page accessed */ FOLL_TOUCH = 1 << 16, /* a retry, previous pass started an IO */ FOLL_TRIED = 1 << 17, /* we are working on non-current tsk/mm */ FOLL_REMOTE = 1 << 18, /* pages must be released via unpin_user_page */ FOLL_PIN = 1 << 19, /* gup_fast: prevent fall-back to slow gup */ FOLL_FAST_ONLY = 1 << 20, /* allow unlocking the mmap lock */ FOLL_UNLOCKABLE = 1 << 21, /* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */ FOLL_MADV_POPULATE = 1 << 22, }; #define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \ FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \ FOLL_MADV_POPULATE) /* * Indicates for which pages that are write-protected in the page table, * whether GUP has to trigger unsharing via FAULT_FLAG_UNSHARE such that the * GUP pin will remain consistent with the pages mapped into the page tables * of the MM. * * Temporary unmapping of PageAnonExclusive() pages or clearing of * PageAnonExclusive() has to protect against concurrent GUP: * * Ordinary GUP: Using the PT lock * * GUP-fast and fork(): mm->write_protect_seq * * GUP-fast and KSM or temporary unmapping (swap, migration): see * folio_try_share_anon_rmap_*() * * Must be called with the (sub)page that's actually referenced via the * page table entry, which might not necessarily be the head page for a * PTE-mapped THP. * * If the vma is NULL, we're coming from the GUP-fast path and might have * to fallback to the slow path just to lookup the vma. */ static inline bool gup_must_unshare(struct vm_area_struct *vma, unsigned int flags, struct page *page) { /* * FOLL_WRITE is implicitly handled correctly as the page table entry * has to be writable -- and if it references (part of) an anonymous * folio, that part is required to be marked exclusive. */ if ((flags & (FOLL_WRITE | FOLL_PIN)) != FOLL_PIN) return false; /* * Note: PageAnon(page) is stable until the page is actually getting * freed. */ if (!PageAnon(page)) { /* * We only care about R/O long-term pining: R/O short-term * pinning does not have the semantics to observe successive * changes through the process page tables. */ if (!(flags & FOLL_LONGTERM)) return false; /* We really need the vma ... */ if (!vma) return true; /* * ... because we only care about writable private ("COW") * mappings where we have to break COW early. */ return is_cow_mapping(vma->vm_flags); } /* Paired with a memory barrier in folio_try_share_anon_rmap_*(). */ if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_rmb(); /* * Note that KSM pages cannot be exclusive, and consequently, * cannot get pinned. */ return !PageAnonExclusive(page); } extern bool mirrored_kernelcore; bool memblock_has_mirror(void); void memblock_free_all(void); static __always_inline void vma_set_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff) { vma->vm_start = start; vma->vm_end = end; vma->vm_pgoff = pgoff; } static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) { /* * NOTE: we must check this before VM_SOFTDIRTY on soft-dirty * enablements, because when without soft-dirty being compiled in, * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY) * will be constantly true. */ if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) return false; /* * Soft-dirty is kind of special: its tracking is enabled when the * vma flags not set. */ return !(vma->vm_flags & VM_SOFTDIRTY); } static inline bool pmd_needs_soft_dirty_wp(struct vm_area_struct *vma, pmd_t pmd) { return vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd); } static inline bool pte_needs_soft_dirty_wp(struct vm_area_struct *vma, pte_t pte) { return vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte); } void __meminit __init_single_page(struct page *page, unsigned long pfn, unsigned long zone, int nid); void __meminit __init_page_from_nid(unsigned long pfn, int nid); /* shrinker related functions */ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority); int shmem_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t index, void *expected, gfp_t gfp); int shmem_inode_acct_blocks(struct inode *inode, long pages); bool shmem_recalc_inode(struct inode *inode, long alloced, long swapped); #ifdef CONFIG_SHRINKER_DEBUG static inline __printf(2, 0) int shrinker_debugfs_name_alloc( struct shrinker *shrinker, const char *fmt, va_list ap) { shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); return shrinker->name ? 0 : -ENOMEM; } static inline void shrinker_debugfs_name_free(struct shrinker *shrinker) { kfree_const(shrinker->name); shrinker->name = NULL; } extern int shrinker_debugfs_add(struct shrinker *shrinker); extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, int *debugfs_id); extern void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id); #else /* CONFIG_SHRINKER_DEBUG */ static inline int shrinker_debugfs_add(struct shrinker *shrinker) { return 0; } static inline int shrinker_debugfs_name_alloc(struct shrinker *shrinker, const char *fmt, va_list ap) { return 0; } static inline void shrinker_debugfs_name_free(struct shrinker *shrinker) { } static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, int *debugfs_id) { *debugfs_id = -1; return NULL; } static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id) { } #endif /* CONFIG_SHRINKER_DEBUG */ /* Only track the nodes of mappings with shadow entries */ void workingset_update_node(struct xa_node *node); extern struct list_lru shadow_nodes; #define mapping_set_update(xas, mapping) do { \ if (!dax_mapping(mapping) && !shmem_mapping(mapping)) { \ xas_set_update(xas, workingset_update_node); \ xas_set_lru(xas, &shadow_nodes); \ } \ } while (0) /* mremap.c */ unsigned long move_page_tables(struct pagetable_move_control *pmc); #ifdef CONFIG_UNACCEPTED_MEMORY void accept_page(struct page *page); #else /* CONFIG_UNACCEPTED_MEMORY */ static inline void accept_page(struct page *page) { } #endif /* CONFIG_UNACCEPTED_MEMORY */ /* pagewalk.c */ int walk_page_range_mm_unsafe(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, void *private); int walk_page_range_vma_unsafe(struct vm_area_struct *vma, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, void *private); int walk_page_range_debug(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, pgd_t *pgd, void *private); /* pt_reclaim.c */ bool try_get_and_clear_pmd(struct mm_struct *mm, pmd_t *pmd, pmd_t *pmdval); void free_pte(struct mm_struct *mm, unsigned long addr, struct mmu_gather *tlb, pmd_t pmdval); void try_to_free_pte(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, struct mmu_gather *tlb); #ifdef CONFIG_PT_RECLAIM bool reclaim_pt_is_enabled(unsigned long start, unsigned long end, struct zap_details *details); #else static inline bool reclaim_pt_is_enabled(unsigned long start, unsigned long end, struct zap_details *details) { return false; } #endif /* CONFIG_PT_RECLAIM */ void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm); int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm); void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn); int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t pgprot); static inline void io_remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long orig_pfn, unsigned long size) { const unsigned long pfn = io_remap_pfn_range_pfn(orig_pfn, size); return remap_pfn_range_prepare(desc, pfn); } static inline int io_remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr, unsigned long orig_pfn, unsigned long size, pgprot_t orig_prot) { const unsigned long pfn = io_remap_pfn_range_pfn(orig_pfn, size); const pgprot_t prot = pgprot_decrypted(orig_prot); return remap_pfn_range_complete(vma, addr, pfn, size, prot); } #endif /* __MM_INTERNAL_H */ |
| 134 146 145 148 146 13 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 | // SPDX-License-Identifier: GPL-2.0 /* * Creating audit events from TTY input. * * Copyright (C) 2007 Red Hat, Inc. All rights reserved. * * Authors: Miloslav Trmac <mitr@redhat.com> */ #include <linux/audit.h> #include <linux/slab.h> #include <linux/tty.h> #include "tty.h" #define TTY_AUDIT_BUF_SIZE 4096 struct tty_audit_buf { struct mutex mutex; /* Protects all data below */ dev_t dev; /* The TTY which the data is from */ bool icanon; size_t valid; u8 *data; /* Allocated size TTY_AUDIT_BUF_SIZE */ }; static struct tty_audit_buf *tty_audit_buf_ref(void) { struct tty_audit_buf *buf; buf = current->signal->tty_audit_buf; WARN_ON(buf == ERR_PTR(-ESRCH)); return buf; } static struct tty_audit_buf *tty_audit_buf_alloc(void) { struct tty_audit_buf *buf; buf = kzalloc(sizeof(*buf), GFP_KERNEL); if (!buf) goto err; buf->data = kmalloc(TTY_AUDIT_BUF_SIZE, GFP_KERNEL); if (!buf->data) goto err_buf; mutex_init(&buf->mutex); return buf; err_buf: kfree(buf); err: return NULL; } static void tty_audit_buf_free(struct tty_audit_buf *buf) { WARN_ON(buf->valid != 0); kfree(buf->data); kfree(buf); } static void tty_audit_log(const char *description, dev_t dev, const u8 *data, size_t size) { struct audit_buffer *ab; pid_t pid = task_pid_nr(current); uid_t uid = from_kuid(&init_user_ns, task_uid(current)); uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(current)); unsigned int sessionid = audit_get_sessionid(current); char name[TASK_COMM_LEN]; ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_TTY); if (!ab) return; audit_log_format(ab, "%s pid=%u uid=%u auid=%u ses=%u major=%d minor=%d comm=", description, pid, uid, loginuid, sessionid, MAJOR(dev), MINOR(dev)); get_task_comm(name, current); audit_log_untrustedstring(ab, name); audit_log_format(ab, " data="); audit_log_n_hex(ab, data, size); audit_log_end(ab); } /* * tty_audit_buf_push - Push buffered data out * * Generate an audit message from the contents of @buf, which is owned by * the current task. @buf->mutex must be locked. */ static void tty_audit_buf_push(struct tty_audit_buf *buf) { if (buf->valid == 0) return; if (audit_enabled == AUDIT_OFF) { buf->valid = 0; return; } tty_audit_log("tty", buf->dev, buf->data, buf->valid); buf->valid = 0; } /** * tty_audit_exit - Handle a task exit * * Make sure all buffered data is written out and deallocate the buffer. * Only needs to be called if current->signal->tty_audit_buf != %NULL. * * The process is single-threaded at this point; no other threads share * current->signal. */ void tty_audit_exit(void) { struct tty_audit_buf *buf; buf = xchg(¤t->signal->tty_audit_buf, ERR_PTR(-ESRCH)); if (!buf) return; tty_audit_buf_push(buf); tty_audit_buf_free(buf); } /* * tty_audit_fork - Copy TTY audit state for a new task * * Set up TTY audit state in @sig from current. @sig needs no locking. */ void tty_audit_fork(struct signal_struct *sig) { sig->audit_tty = current->signal->audit_tty; } /* * tty_audit_tiocsti - Log TIOCSTI */ void tty_audit_tiocsti(const struct tty_struct *tty, u8 ch) { dev_t dev; dev = MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index; if (tty_audit_push()) return; if (audit_enabled) tty_audit_log("ioctl=TIOCSTI", dev, &ch, 1); } /* * tty_audit_push - Flush current's pending audit data * * Returns 0 if success, -EPERM if tty audit is disabled */ int tty_audit_push(void) { struct tty_audit_buf *buf; if (~current->signal->audit_tty & AUDIT_TTY_ENABLE) return -EPERM; buf = tty_audit_buf_ref(); if (!IS_ERR_OR_NULL(buf)) { mutex_lock(&buf->mutex); tty_audit_buf_push(buf); mutex_unlock(&buf->mutex); } return 0; } /* * tty_audit_buf_get - Get an audit buffer. * * Get an audit buffer, allocate it if necessary. Return %NULL * if out of memory or ERR_PTR(-ESRCH) if tty_audit_exit() has already * occurred. Otherwise, return a new reference to the buffer. */ static struct tty_audit_buf *tty_audit_buf_get(void) { struct tty_audit_buf *buf; buf = tty_audit_buf_ref(); if (buf) return buf; buf = tty_audit_buf_alloc(); if (buf == NULL) { audit_log_lost("out of memory in TTY auditing"); return NULL; } /* Race to use this buffer, free it if another wins */ if (cmpxchg(¤t->signal->tty_audit_buf, NULL, buf) != NULL) tty_audit_buf_free(buf); return tty_audit_buf_ref(); } /* * tty_audit_add_data - Add data for TTY auditing. * * Audit @data of @size from @tty, if necessary. */ void tty_audit_add_data(const struct tty_struct *tty, const void *data, size_t size) { struct tty_audit_buf *buf; unsigned int audit_tty; bool icanon = L_ICANON(tty); dev_t dev; audit_tty = READ_ONCE(current->signal->audit_tty); if (~audit_tty & AUDIT_TTY_ENABLE) return; if (unlikely(size == 0)) return; if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) return; if ((~audit_tty & AUDIT_TTY_LOG_PASSWD) && icanon && !L_ECHO(tty)) return; buf = tty_audit_buf_get(); if (IS_ERR_OR_NULL(buf)) return; mutex_lock(&buf->mutex); dev = MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index; if (buf->dev != dev || buf->icanon != icanon) { tty_audit_buf_push(buf); buf->dev = dev; buf->icanon = icanon; } do { size_t run; run = TTY_AUDIT_BUF_SIZE - buf->valid; if (run > size) run = size; memcpy(buf->data + buf->valid, data, run); buf->valid += run; data += run; size -= run; if (buf->valid == TTY_AUDIT_BUF_SIZE) tty_audit_buf_push(buf); } while (size != 0); mutex_unlock(&buf->mutex); } |
| 119 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Virtio PCI driver - common functionality for all device versions * * This module allows virtio devices to be used over a virtual PCI device. * This can be used with QEMU based VMMs like KVM or Xen. * * Copyright IBM Corp. 2007 * Copyright Red Hat, Inc. 2014 * * Authors: * Anthony Liguori <aliguori@us.ibm.com> * Rusty Russell <rusty@rustcorp.com.au> * Michael S. Tsirkin <mst@redhat.com> */ #include "virtio_pci_common.h" static bool force_legacy = false; #if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY) module_param(force_legacy, bool, 0444); MODULE_PARM_DESC(force_legacy, "Force legacy mode for transitional virtio 1 devices"); #endif bool vp_is_avq(struct virtio_device *vdev, unsigned int index) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) return false; return index == vp_dev->admin_vq.vq_index; } /* wait for pending irq handlers */ void vp_synchronize_vectors(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; if (vp_dev->intx_enabled) synchronize_irq(vp_dev->pci_dev->irq); for (i = 0; i < vp_dev->msix_vectors; ++i) synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); } /* the notify function used when creating a virt queue */ bool vp_notify(struct virtqueue *vq) { /* we write the queue's selector into the notification register to * signal the other end */ iowrite16(vq->index, (void __iomem *)vq->priv); return true; } /* Notify all slow path virtqueues on an interrupt. */ static void vp_vring_slow_path_interrupt(int irq, struct virtio_pci_device *vp_dev) { struct virtio_pci_vq_info *info; unsigned long flags; spin_lock_irqsave(&vp_dev->lock, flags); list_for_each_entry(info, &vp_dev->slow_virtqueues, node) vring_interrupt(irq, info->vq); spin_unlock_irqrestore(&vp_dev->lock, flags); } /* Handle a configuration change: Tell driver if it wants to know. */ static irqreturn_t vp_config_changed(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; virtio_config_changed(&vp_dev->vdev); vp_vring_slow_path_interrupt(irq, vp_dev); return IRQ_HANDLED; } /* Notify all virtqueues on an interrupt. */ static irqreturn_t vp_vring_interrupt(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; struct virtio_pci_vq_info *info; irqreturn_t ret = IRQ_NONE; unsigned long flags; spin_lock_irqsave(&vp_dev->lock, flags); list_for_each_entry(info, &vp_dev->virtqueues, node) { if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) ret = IRQ_HANDLED; } spin_unlock_irqrestore(&vp_dev->lock, flags); return ret; } /* A small wrapper to also acknowledge the interrupt when it's handled. * I really need an EIO hook for the vring so I can ack the interrupt once we * know that we'll be handling the IRQ but before we invoke the callback since * the callback may notify the host which results in the host attempting to * raise an interrupt that we would then mask once we acknowledged the * interrupt. */ static irqreturn_t vp_interrupt(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; u8 isr; /* reading the ISR has the effect of also clearing it so it's very * important to save off the value. */ isr = ioread8(vp_dev->isr); /* It's definitely not us if the ISR was not high */ if (!isr) return IRQ_NONE; /* Configuration change? Tell driver if it wants to know. */ if (isr & VIRTIO_PCI_ISR_CONFIG) vp_config_changed(irq, opaque); return vp_vring_interrupt(irq, opaque); } static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, bool per_vq_vectors, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); unsigned int flags = PCI_IRQ_MSIX; unsigned int i, v; int err = -ENOMEM; vp_dev->msix_vectors = nvectors; vp_dev->msix_names = kmalloc_array(nvectors, sizeof(*vp_dev->msix_names), GFP_KERNEL); if (!vp_dev->msix_names) goto error; vp_dev->msix_affinity_masks = kcalloc(nvectors, sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL); if (!vp_dev->msix_affinity_masks) goto error; for (i = 0; i < nvectors; ++i) if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], GFP_KERNEL)) goto error; if (!per_vq_vectors) desc = NULL; if (desc) { flags |= PCI_IRQ_AFFINITY; desc->pre_vectors++; /* virtio config vector */ } err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors, nvectors, flags, desc); if (err < 0) goto error; vp_dev->msix_enabled = 1; /* Set the vector used for configuration */ v = vp_dev->msix_used_vectors; snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-config", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), vp_config_changed, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; ++vp_dev->msix_used_vectors; v = vp_dev->config_vector(vp_dev, v); /* Verify we had enough resources to assign the vector */ if (v == VIRTIO_MSI_NO_VECTOR) { err = -EBUSY; goto error; } if (!per_vq_vectors) { /* Shared vector for all VQs */ v = vp_dev->msix_used_vectors; snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-virtqueues", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), vp_vring_interrupt, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; ++vp_dev->msix_used_vectors; } return 0; error: return err; } static bool vp_is_slow_path_vector(u16 msix_vec) { return msix_vec == VP_MSIX_CONFIG_VECTOR; } static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, bool ctx, u16 msix_vec, struct virtio_pci_vq_info **p_info) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL); struct virtqueue *vq; unsigned long flags; /* fill out our structure that represents an active queue */ if (!info) return ERR_PTR(-ENOMEM); vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx, msix_vec); if (IS_ERR(vq)) goto out_info; info->vq = vq; if (callback) { spin_lock_irqsave(&vp_dev->lock, flags); if (!vp_is_slow_path_vector(msix_vec)) list_add(&info->node, &vp_dev->virtqueues); else list_add(&info->node, &vp_dev->slow_virtqueues); spin_unlock_irqrestore(&vp_dev->lock, flags); } else { INIT_LIST_HEAD(&info->node); } *p_info = info; return vq; out_info: kfree(info); return vq; } static void vp_del_vq(struct virtqueue *vq, struct virtio_pci_vq_info *info) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); unsigned long flags; /* * If it fails during re-enable reset vq. This way we won't rejoin * info->node to the queue. Prevent unexpected irqs. */ if (!vq->reset) { spin_lock_irqsave(&vp_dev->lock, flags); list_del(&info->node); spin_unlock_irqrestore(&vp_dev->lock, flags); } vp_dev->del_vq(info); kfree(info); } /* the config->del_vqs() implementation */ void vp_del_vqs(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info; struct virtqueue *vq, *n; int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { info = vp_is_avq(vdev, vq->index) ? vp_dev->admin_vq.info : vp_dev->vqs[vq->index]; if (vp_dev->per_vq_vectors) { int v = info->msix_vector; if (v != VIRTIO_MSI_NO_VECTOR && !vp_is_slow_path_vector(v)) { int irq = pci_irq_vector(vp_dev->pci_dev, v); irq_update_affinity_hint(irq, NULL); free_irq(irq, vq); } } vp_del_vq(vq, info); } vp_dev->per_vq_vectors = false; if (vp_dev->intx_enabled) { free_irq(vp_dev->pci_dev->irq, vp_dev); vp_dev->intx_enabled = 0; } for (i = 0; i < vp_dev->msix_used_vectors; ++i) free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev); if (vp_dev->msix_affinity_masks) { for (i = 0; i < vp_dev->msix_vectors; i++) free_cpumask_var(vp_dev->msix_affinity_masks[i]); } if (vp_dev->msix_enabled) { /* Disable the vector used for configuration */ vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); pci_free_irq_vectors(vp_dev->pci_dev); vp_dev->msix_enabled = 0; } vp_dev->msix_vectors = 0; vp_dev->msix_used_vectors = 0; kfree(vp_dev->msix_names); vp_dev->msix_names = NULL; kfree(vp_dev->msix_affinity_masks); vp_dev->msix_affinity_masks = NULL; kfree(vp_dev->vqs); vp_dev->vqs = NULL; } enum vp_vq_vector_policy { VP_VQ_VECTOR_POLICY_EACH, VP_VQ_VECTOR_POLICY_SHARED_SLOW, VP_VQ_VECTOR_POLICY_SHARED, }; static struct virtqueue * vp_find_one_vq_msix(struct virtio_device *vdev, int queue_idx, vq_callback_t *callback, const char *name, bool ctx, bool slow_path, int *allocated_vectors, enum vp_vq_vector_policy vector_policy, struct virtio_pci_vq_info **p_info) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq; u16 msix_vec; int err; if (!callback) msix_vec = VIRTIO_MSI_NO_VECTOR; else if (vector_policy == VP_VQ_VECTOR_POLICY_EACH || (vector_policy == VP_VQ_VECTOR_POLICY_SHARED_SLOW && !slow_path)) msix_vec = (*allocated_vectors)++; else if (vector_policy != VP_VQ_VECTOR_POLICY_EACH && slow_path) msix_vec = VP_MSIX_CONFIG_VECTOR; else msix_vec = VP_MSIX_VQ_VECTOR; vq = vp_setup_vq(vdev, queue_idx, callback, name, ctx, msix_vec, p_info); if (IS_ERR(vq)) return vq; if (vector_policy == VP_VQ_VECTOR_POLICY_SHARED || msix_vec == VIRTIO_MSI_NO_VECTOR || vp_is_slow_path_vector(msix_vec)) return vq; /* allocate per-vq irq if available and necessary */ snprintf(vp_dev->msix_names[msix_vec], sizeof(*vp_dev->msix_names), "%s-%s", dev_name(&vp_dev->vdev.dev), name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), vring_interrupt, 0, vp_dev->msix_names[msix_vec], vq); if (err) { vp_del_vq(vq, *p_info); return ERR_PTR(err); } return vq; } static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[], enum vp_vq_vector_policy vector_policy, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; struct virtqueue_info *vqi; int i, err, nvectors, allocated_vectors, queue_idx = 0; struct virtqueue *vq; bool per_vq_vectors; u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; if (vp_dev->avq_index) { err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); if (err) goto error_find; } per_vq_vectors = vector_policy != VP_VQ_VECTOR_POLICY_SHARED; if (per_vq_vectors) { /* Best option: one for change interrupt, one per vq. */ nvectors = 1; for (i = 0; i < nvqs; ++i) { vqi = &vqs_info[i]; if (vqi->name && vqi->callback) ++nvectors; } if (avq_num && vector_policy == VP_VQ_VECTOR_POLICY_EACH) ++nvectors; } else { /* Second best: one for change, shared for all vqs. */ nvectors = 2; } err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors, desc); if (err) goto error_find; vp_dev->per_vq_vectors = per_vq_vectors; allocated_vectors = vp_dev->msix_used_vectors; for (i = 0; i < nvqs; ++i) { vqi = &vqs_info[i]; if (!vqi->name) { vqs[i] = NULL; continue; } vqs[i] = vp_find_one_vq_msix(vdev, queue_idx++, vqi->callback, vqi->name, vqi->ctx, false, &allocated_vectors, vector_policy, &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error_find; } } if (!avq_num) return 0; sprintf(avq->name, "avq.%u", avq->vq_index); vq = vp_find_one_vq_msix(vdev, avq->vq_index, vp_modern_avq_done, avq->name, false, true, &allocated_vectors, vector_policy, &vp_dev->admin_vq.info); if (IS_ERR(vq)) { err = PTR_ERR(vq); goto error_find; } return 0; error_find: vp_del_vqs(vdev); return err; } static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[]) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; int i, err, queue_idx = 0; struct virtqueue *vq; u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; if (vp_dev->avq_index) { err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); if (err) goto out_del_vqs; } err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (err) goto out_del_vqs; vp_dev->intx_enabled = 1; vp_dev->per_vq_vectors = false; for (i = 0; i < nvqs; ++i) { struct virtqueue_info *vqi = &vqs_info[i]; if (!vqi->name) { vqs[i] = NULL; continue; } vqs[i] = vp_setup_vq(vdev, queue_idx++, vqi->callback, vqi->name, vqi->ctx, VIRTIO_MSI_NO_VECTOR, &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto out_del_vqs; } } if (!avq_num) return 0; sprintf(avq->name, "avq.%u", avq->vq_index); vq = vp_setup_vq(vdev, queue_idx++, vp_modern_avq_done, avq->name, false, VIRTIO_MSI_NO_VECTOR, &vp_dev->admin_vq.info); if (IS_ERR(vq)) { err = PTR_ERR(vq); goto out_del_vqs; } return 0; out_del_vqs: vp_del_vqs(vdev); return err; } /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[], struct irq_affinity *desc) { int err; /* Try MSI-X with one vector per queue. */ err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, VP_VQ_VECTOR_POLICY_EACH, desc); if (!err) return 0; /* Fallback: MSI-X with one shared vector for config and * slow path queues, one vector per queue for the rest. */ err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, VP_VQ_VECTOR_POLICY_SHARED_SLOW, desc); if (!err) return 0; /* Fallback: MSI-X with one vector for config, one shared for queues. */ err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, VP_VQ_VECTOR_POLICY_SHARED, desc); if (!err) return 0; /* Is there an interrupt? If not give up. */ if (!(to_vp_device(vdev)->pci_dev->irq)) return err; /* Finally fall back to regular interrupts. */ return vp_find_vqs_intx(vdev, nvqs, vqs, vqs_info); } const char *vp_bus_name(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); return pci_name(vp_dev->pci_dev); } /* Setup the affinity for a virtqueue: * - force the affinity for per vq vector * - OR over all affinities for shared MSI * - ignore the affinity request if we're using INTX */ int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask) { struct virtio_device *vdev = vq->vdev; struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; struct cpumask *mask; unsigned int irq; if (!vq->callback) return -EINVAL; if (vp_dev->msix_enabled) { mask = vp_dev->msix_affinity_masks[info->msix_vector]; irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector); if (!cpu_mask) irq_update_affinity_hint(irq, NULL); else { cpumask_copy(mask, cpu_mask); irq_set_affinity_and_hint(irq, mask); } } return 0; } const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); if (!vp_dev->per_vq_vectors || vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR || vp_is_slow_path_vector(vp_dev->vqs[index]->msix_vector)) return NULL; return pci_irq_get_affinity(vp_dev->pci_dev, vp_dev->vqs[index]->msix_vector); } #ifdef CONFIG_PM_SLEEP static int virtio_pci_freeze(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret; ret = virtio_device_freeze(&vp_dev->vdev); if (!ret) pci_disable_device(pci_dev); return ret; } static int virtio_pci_restore(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret; ret = pci_enable_device(pci_dev); if (ret) return ret; pci_set_master(pci_dev); return virtio_device_restore(&vp_dev->vdev); } static bool vp_supports_pm_no_reset(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); u16 pmcsr; if (!pci_dev->pm_cap) return false; pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, &pmcsr); if (PCI_POSSIBLE_ERROR(pmcsr)) { dev_err(dev, "Unable to query pmcsr"); return false; } return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET; } static int virtio_pci_suspend(struct device *dev) { return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev); } static int virtio_pci_resume(struct device *dev) { return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev); } static const struct dev_pm_ops virtio_pci_pm_ops = { .suspend = virtio_pci_suspend, .resume = virtio_pci_resume, .freeze = virtio_pci_freeze, .thaw = virtio_pci_restore, .poweroff = virtio_pci_freeze, .restore = virtio_pci_restore, }; #endif /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ static const struct pci_device_id virtio_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) }, { 0 } }; MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); static void virtio_pci_release_dev(struct device *_d) { struct virtio_device *vdev = dev_to_virtio(_d); struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* As struct device is a kobject, it's not safe to * free the memory (including the reference counter itself) * until it's release callback. */ kfree(vp_dev); } static int virtio_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) { struct virtio_pci_device *vp_dev, *reg_dev = NULL; int rc; /* allocate our structure and fill it out */ vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL); if (!vp_dev) return -ENOMEM; pci_set_drvdata(pci_dev, vp_dev); vp_dev->vdev.dev.parent = &pci_dev->dev; vp_dev->vdev.dev.release = virtio_pci_release_dev; vp_dev->pci_dev = pci_dev; INIT_LIST_HEAD(&vp_dev->virtqueues); INIT_LIST_HEAD(&vp_dev->slow_virtqueues); spin_lock_init(&vp_dev->lock); /* enable the device */ rc = pci_enable_device(pci_dev); if (rc) goto err_enable_device; if (force_legacy) { rc = virtio_pci_legacy_probe(vp_dev); /* Also try modern mode if we can't map BAR0 (no IO space). */ if (rc == -ENODEV || rc == -ENOMEM) rc = virtio_pci_modern_probe(vp_dev); if (rc) goto err_probe; } else { rc = virtio_pci_modern_probe(vp_dev); if (rc == -ENODEV) rc = virtio_pci_legacy_probe(vp_dev); if (rc) goto err_probe; } pci_set_master(pci_dev); rc = register_virtio_device(&vp_dev->vdev); reg_dev = vp_dev; if (rc) goto err_register; return 0; err_register: if (vp_dev->is_legacy) virtio_pci_legacy_remove(vp_dev); else virtio_pci_modern_remove(vp_dev); err_probe: pci_disable_device(pci_dev); err_enable_device: if (reg_dev) put_device(&vp_dev->vdev.dev); else kfree(vp_dev); return rc; } static void virtio_pci_remove(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct device *dev = get_device(&vp_dev->vdev.dev); /* * Device is marked broken on surprise removal so that virtio upper * layers can abort any ongoing operation. */ if (!pci_device_is_present(pci_dev)) virtio_break_device(&vp_dev->vdev); pci_disable_sriov(pci_dev); unregister_virtio_device(&vp_dev->vdev); if (vp_dev->is_legacy) virtio_pci_legacy_remove(vp_dev); else virtio_pci_modern_remove(vp_dev); pci_disable_device(pci_dev); put_device(dev); } static int virtio_pci_sriov_configure(struct pci_dev *pci_dev, int num_vfs) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct virtio_device *vdev = &vp_dev->vdev; int ret; if (!(vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_DRIVER_OK)) return -EBUSY; if (!__virtio_test_bit(vdev, VIRTIO_F_SR_IOV)) return -EINVAL; if (pci_vfs_assigned(pci_dev)) return -EPERM; if (num_vfs == 0) { pci_disable_sriov(pci_dev); return 0; } ret = pci_enable_sriov(pci_dev, num_vfs); if (ret < 0) return ret; return num_vfs; } static void virtio_pci_reset_prepare(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret = 0; ret = virtio_device_reset_prepare(&vp_dev->vdev); if (ret) { if (ret != -EOPNOTSUPP) dev_warn(&pci_dev->dev, "Reset prepare failure: %d", ret); return; } if (pci_is_enabled(pci_dev)) pci_disable_device(pci_dev); } static void virtio_pci_reset_done(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret; if (pci_is_enabled(pci_dev)) return; ret = pci_enable_device(pci_dev); if (!ret) { pci_set_master(pci_dev); ret = virtio_device_reset_done(&vp_dev->vdev); } if (ret && ret != -EOPNOTSUPP) dev_warn(&pci_dev->dev, "Reset done failure: %d", ret); } static const struct pci_error_handlers virtio_pci_err_handler = { .reset_prepare = virtio_pci_reset_prepare, .reset_done = virtio_pci_reset_done, }; static struct pci_driver virtio_pci_driver = { .name = "virtio-pci", .id_table = virtio_pci_id_table, .probe = virtio_pci_probe, .remove = virtio_pci_remove, #ifdef CONFIG_PM_SLEEP .driver.pm = &virtio_pci_pm_ops, #endif .sriov_configure = virtio_pci_sriov_configure, .err_handler = &virtio_pci_err_handler, }; struct virtio_device *virtio_pci_vf_get_pf_dev(struct pci_dev *pdev) { struct virtio_pci_device *pf_vp_dev; pf_vp_dev = pci_iov_get_pf_drvdata(pdev, &virtio_pci_driver); if (IS_ERR(pf_vp_dev)) return NULL; return &pf_vp_dev->vdev; } module_pci_driver(virtio_pci_driver); MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>"); MODULE_DESCRIPTION("virtio-pci"); MODULE_LICENSE("GPL"); MODULE_VERSION("1"); |
| 7 7 1 4 4 8 2 2 5 1 1 1 8 4 4 8 8 6 1 1 2 2 1 1 2 2 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 | // SPDX-License-Identifier: GPL-2.0-or-later /* * cpia CPiA (1) gspca driver * * Copyright (C) 2010-2011 Hans de Goede <hdegoede@redhat.com> * * This module is adapted from the in kernel v4l1 cpia driver which is : * * (C) Copyright 1999-2000 Peter Pregler * (C) Copyright 1999-2000 Scott J. Bertin * (C) Copyright 1999-2000 Johannes Erdfelt <johannes@erdfelt.com> * (C) Copyright 2000 STMicroelectronics */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "cpia1" #include <linux/input.h> #include <linux/sched/signal.h> #include <linux/bitops.h> #include "gspca.h" MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>"); MODULE_DESCRIPTION("Vision CPiA"); MODULE_LICENSE("GPL"); /* constant value's */ #define MAGIC_0 0x19 #define MAGIC_1 0x68 #define DATA_IN 0xc0 #define DATA_OUT 0x40 #define VIDEOSIZE_QCIF 0 /* 176x144 */ #define VIDEOSIZE_CIF 1 /* 352x288 */ #define SUBSAMPLE_420 0 #define SUBSAMPLE_422 1 #define YUVORDER_YUYV 0 #define YUVORDER_UYVY 1 #define NOT_COMPRESSED 0 #define COMPRESSED 1 #define NO_DECIMATION 0 #define DECIMATION_ENAB 1 #define EOI 0xff /* End Of Image */ #define EOL 0xfd /* End Of Line */ #define FRAME_HEADER_SIZE 64 /* Image grab modes */ #define CPIA_GRAB_SINGLE 0 #define CPIA_GRAB_CONTINEOUS 1 /* Compression parameters */ #define CPIA_COMPRESSION_NONE 0 #define CPIA_COMPRESSION_AUTO 1 #define CPIA_COMPRESSION_MANUAL 2 #define CPIA_COMPRESSION_TARGET_QUALITY 0 #define CPIA_COMPRESSION_TARGET_FRAMERATE 1 /* Return offsets for GetCameraState */ #define SYSTEMSTATE 0 #define GRABSTATE 1 #define STREAMSTATE 2 #define FATALERROR 3 #define CMDERROR 4 #define DEBUGFLAGS 5 #define VPSTATUS 6 #define ERRORCODE 7 /* SystemState */ #define UNINITIALISED_STATE 0 #define PASS_THROUGH_STATE 1 #define LO_POWER_STATE 2 #define HI_POWER_STATE 3 #define WARM_BOOT_STATE 4 /* GrabState */ #define GRAB_IDLE 0 #define GRAB_ACTIVE 1 #define GRAB_DONE 2 /* StreamState */ #define STREAM_NOT_READY 0 #define STREAM_READY 1 #define STREAM_OPEN 2 #define STREAM_PAUSED 3 #define STREAM_FINISHED 4 /* Fatal Error, CmdError, and DebugFlags */ #define CPIA_FLAG 1 #define SYSTEM_FLAG 2 #define INT_CTRL_FLAG 4 #define PROCESS_FLAG 8 #define COM_FLAG 16 #define VP_CTRL_FLAG 32 #define CAPTURE_FLAG 64 #define DEBUG_FLAG 128 /* VPStatus */ #define VP_STATE_OK 0x00 #define VP_STATE_FAILED_VIDEOINIT 0x01 #define VP_STATE_FAILED_AECACBINIT 0x02 #define VP_STATE_AEC_MAX 0x04 #define VP_STATE_ACB_BMAX 0x08 #define VP_STATE_ACB_RMIN 0x10 #define VP_STATE_ACB_GMIN 0x20 #define VP_STATE_ACB_RMAX 0x40 #define VP_STATE_ACB_GMAX 0x80 /* default (minimum) compensation values */ #define COMP_RED 220 #define COMP_GREEN1 214 #define COMP_GREEN2 COMP_GREEN1 #define COMP_BLUE 230 /* exposure status */ #define EXPOSURE_VERY_LIGHT 0 #define EXPOSURE_LIGHT 1 #define EXPOSURE_NORMAL 2 #define EXPOSURE_DARK 3 #define EXPOSURE_VERY_DARK 4 #define CPIA_MODULE_CPIA (0 << 5) #define CPIA_MODULE_SYSTEM (1 << 5) #define CPIA_MODULE_VP_CTRL (5 << 5) #define CPIA_MODULE_CAPTURE (6 << 5) #define CPIA_MODULE_DEBUG (7 << 5) #define INPUT (DATA_IN << 8) #define OUTPUT (DATA_OUT << 8) #define CPIA_COMMAND_GetCPIAVersion (INPUT | CPIA_MODULE_CPIA | 1) #define CPIA_COMMAND_GetPnPID (INPUT | CPIA_MODULE_CPIA | 2) #define CPIA_COMMAND_GetCameraStatus (INPUT | CPIA_MODULE_CPIA | 3) #define CPIA_COMMAND_GotoHiPower (OUTPUT | CPIA_MODULE_CPIA | 4) #define CPIA_COMMAND_GotoLoPower (OUTPUT | CPIA_MODULE_CPIA | 5) #define CPIA_COMMAND_GotoSuspend (OUTPUT | CPIA_MODULE_CPIA | 7) #define CPIA_COMMAND_GotoPassThrough (OUTPUT | CPIA_MODULE_CPIA | 8) #define CPIA_COMMAND_ModifyCameraStatus (OUTPUT | CPIA_MODULE_CPIA | 10) #define CPIA_COMMAND_ReadVCRegs (INPUT | CPIA_MODULE_SYSTEM | 1) #define CPIA_COMMAND_WriteVCReg (OUTPUT | CPIA_MODULE_SYSTEM | 2) #define CPIA_COMMAND_ReadMCPorts (INPUT | CPIA_MODULE_SYSTEM | 3) #define CPIA_COMMAND_WriteMCPort (OUTPUT | CPIA_MODULE_SYSTEM | 4) #define CPIA_COMMAND_SetBaudRate (OUTPUT | CPIA_MODULE_SYSTEM | 5) #define CPIA_COMMAND_SetECPTiming (OUTPUT | CPIA_MODULE_SYSTEM | 6) #define CPIA_COMMAND_ReadIDATA (INPUT | CPIA_MODULE_SYSTEM | 7) #define CPIA_COMMAND_WriteIDATA (OUTPUT | CPIA_MODULE_SYSTEM | 8) #define CPIA_COMMAND_GenericCall (OUTPUT | CPIA_MODULE_SYSTEM | 9) #define CPIA_COMMAND_I2CStart (OUTPUT | CPIA_MODULE_SYSTEM | 10) #define CPIA_COMMAND_I2CStop (OUTPUT | CPIA_MODULE_SYSTEM | 11) #define CPIA_COMMAND_I2CWrite (OUTPUT | CPIA_MODULE_SYSTEM | 12) #define CPIA_COMMAND_I2CRead (INPUT | CPIA_MODULE_SYSTEM | 13) #define CPIA_COMMAND_GetVPVersion (INPUT | CPIA_MODULE_VP_CTRL | 1) #define CPIA_COMMAND_ResetFrameCounter (INPUT | CPIA_MODULE_VP_CTRL | 2) #define CPIA_COMMAND_SetColourParams (OUTPUT | CPIA_MODULE_VP_CTRL | 3) #define CPIA_COMMAND_SetExposure (OUTPUT | CPIA_MODULE_VP_CTRL | 4) #define CPIA_COMMAND_SetColourBalance (OUTPUT | CPIA_MODULE_VP_CTRL | 6) #define CPIA_COMMAND_SetSensorFPS (OUTPUT | CPIA_MODULE_VP_CTRL | 7) #define CPIA_COMMAND_SetVPDefaults (OUTPUT | CPIA_MODULE_VP_CTRL | 8) #define CPIA_COMMAND_SetApcor (OUTPUT | CPIA_MODULE_VP_CTRL | 9) #define CPIA_COMMAND_SetFlickerCtrl (OUTPUT | CPIA_MODULE_VP_CTRL | 10) #define CPIA_COMMAND_SetVLOffset (OUTPUT | CPIA_MODULE_VP_CTRL | 11) #define CPIA_COMMAND_GetColourParams (INPUT | CPIA_MODULE_VP_CTRL | 16) #define CPIA_COMMAND_GetColourBalance (INPUT | CPIA_MODULE_VP_CTRL | 17) #define CPIA_COMMAND_GetExposure (INPUT | CPIA_MODULE_VP_CTRL | 18) #define CPIA_COMMAND_SetSensorMatrix (OUTPUT | CPIA_MODULE_VP_CTRL | 19) #define CPIA_COMMAND_ColourBars (OUTPUT | CPIA_MODULE_VP_CTRL | 25) #define CPIA_COMMAND_ReadVPRegs (INPUT | CPIA_MODULE_VP_CTRL | 30) #define CPIA_COMMAND_WriteVPReg (OUTPUT | CPIA_MODULE_VP_CTRL | 31) #define CPIA_COMMAND_GrabFrame (OUTPUT | CPIA_MODULE_CAPTURE | 1) #define CPIA_COMMAND_UploadFrame (OUTPUT | CPIA_MODULE_CAPTURE | 2) #define CPIA_COMMAND_SetGrabMode (OUTPUT | CPIA_MODULE_CAPTURE | 3) #define CPIA_COMMAND_InitStreamCap (OUTPUT | CPIA_MODULE_CAPTURE | 4) #define CPIA_COMMAND_FiniStreamCap (OUTPUT | CPIA_MODULE_CAPTURE | 5) #define CPIA_COMMAND_StartStreamCap (OUTPUT | CPIA_MODULE_CAPTURE | 6) #define CPIA_COMMAND_EndStreamCap (OUTPUT | CPIA_MODULE_CAPTURE | 7) #define CPIA_COMMAND_SetFormat (OUTPUT | CPIA_MODULE_CAPTURE | 8) #define CPIA_COMMAND_SetROI (OUTPUT | CPIA_MODULE_CAPTURE | 9) #define CPIA_COMMAND_SetCompression (OUTPUT | CPIA_MODULE_CAPTURE | 10) #define CPIA_COMMAND_SetCompressionTarget (OUTPUT | CPIA_MODULE_CAPTURE | 11) #define CPIA_COMMAND_SetYUVThresh (OUTPUT | CPIA_MODULE_CAPTURE | 12) #define CPIA_COMMAND_SetCompressionParams (OUTPUT | CPIA_MODULE_CAPTURE | 13) #define CPIA_COMMAND_DiscardFrame (OUTPUT | CPIA_MODULE_CAPTURE | 14) #define CPIA_COMMAND_GrabReset (OUTPUT | CPIA_MODULE_CAPTURE | 15) #define CPIA_COMMAND_OutputRS232 (OUTPUT | CPIA_MODULE_DEBUG | 1) #define CPIA_COMMAND_AbortProcess (OUTPUT | CPIA_MODULE_DEBUG | 4) #define CPIA_COMMAND_SetDramPage (OUTPUT | CPIA_MODULE_DEBUG | 5) #define CPIA_COMMAND_StartDramUpload (OUTPUT | CPIA_MODULE_DEBUG | 6) #define CPIA_COMMAND_StartDummyDtream (OUTPUT | CPIA_MODULE_DEBUG | 8) #define CPIA_COMMAND_AbortStream (OUTPUT | CPIA_MODULE_DEBUG | 9) #define CPIA_COMMAND_DownloadDRAM (OUTPUT | CPIA_MODULE_DEBUG | 10) #define CPIA_COMMAND_Null (OUTPUT | CPIA_MODULE_DEBUG | 11) #define ROUND_UP_EXP_FOR_FLICKER 15 /* Constants for automatic frame rate adjustment */ #define MAX_EXP 302 #define MAX_EXP_102 255 #define LOW_EXP 140 #define VERY_LOW_EXP 70 #define TC 94 #define EXP_ACC_DARK 50 #define EXP_ACC_LIGHT 90 #define HIGH_COMP_102 160 #define MAX_COMP 239 #define DARK_TIME 3 #define LIGHT_TIME 3 #define FIRMWARE_VERSION(x, y) (sd->params.version.firmwareVersion == (x) && \ sd->params.version.firmwareRevision == (y)) #define CPIA1_CID_COMP_TARGET (V4L2_CTRL_CLASS_USER + 0x1000) #define BRIGHTNESS_DEF 50 #define CONTRAST_DEF 48 #define SATURATION_DEF 50 #define FREQ_DEF V4L2_CID_POWER_LINE_FREQUENCY_50HZ #define ILLUMINATORS_1_DEF 0 #define ILLUMINATORS_2_DEF 0 #define COMP_TARGET_DEF CPIA_COMPRESSION_TARGET_QUALITY /* Developer's Guide Table 5 p 3-34 * indexed by [mains][sensorFps.baserate][sensorFps.divisor]*/ static u8 flicker_jumps[2][2][4] = { { { 76, 38, 19, 9 }, { 92, 46, 23, 11 } }, { { 64, 32, 16, 8 }, { 76, 38, 19, 9} } }; struct cam_params { struct { u8 firmwareVersion; u8 firmwareRevision; u8 vcVersion; u8 vcRevision; } version; struct { u16 vendor; u16 product; u16 deviceRevision; } pnpID; struct { u8 vpVersion; u8 vpRevision; u16 cameraHeadID; } vpVersion; struct { u8 systemState; u8 grabState; u8 streamState; u8 fatalError; u8 cmdError; u8 debugFlags; u8 vpStatus; u8 errorCode; } status; struct { u8 brightness; u8 contrast; u8 saturation; } colourParams; struct { u8 gainMode; u8 expMode; u8 compMode; u8 centreWeight; u8 gain; u8 fineExp; u8 coarseExpLo; u8 coarseExpHi; u8 redComp; u8 green1Comp; u8 green2Comp; u8 blueComp; } exposure; struct { u8 balanceMode; u8 redGain; u8 greenGain; u8 blueGain; } colourBalance; struct { u8 divisor; u8 baserate; } sensorFps; struct { u8 gain1; u8 gain2; u8 gain4; u8 gain8; } apcor; struct { u8 disabled; u8 flickerMode; u8 coarseJump; u8 allowableOverExposure; } flickerControl; struct { u8 gain1; u8 gain2; u8 gain4; u8 gain8; } vlOffset; struct { u8 mode; u8 decimation; } compression; struct { u8 frTargeting; u8 targetFR; u8 targetQ; } compressionTarget; struct { u8 yThreshold; u8 uvThreshold; } yuvThreshold; struct { u8 hysteresis; u8 threshMax; u8 smallStep; u8 largeStep; u8 decimationHysteresis; u8 frDiffStepThresh; u8 qDiffStepThresh; u8 decimationThreshMod; } compressionParams; struct { u8 videoSize; /* CIF/QCIF */ u8 subSample; u8 yuvOrder; } format; struct { /* Intel QX3 specific data */ u8 qx3_detected; /* a QX3 is present */ u8 toplight; /* top light lit , R/W */ u8 bottomlight; /* bottom light lit, R/W */ u8 button; /* snapshot button pressed (R/O) */ u8 cradled; /* microscope is in cradle (R/O) */ } qx3; struct { u8 colStart; /* skip first 8*colStart pixels */ u8 colEnd; /* finish at 8*colEnd pixels */ u8 rowStart; /* skip first 4*rowStart lines */ u8 rowEnd; /* finish at 4*rowEnd lines */ } roi; u8 ecpTiming; u8 streamStartLine; }; /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ struct cam_params params; /* camera settings */ atomic_t cam_exposure; atomic_t fps; int exposure_count; u8 exposure_status; struct v4l2_ctrl *freq; u8 mainsFreq; /* 0 = 50hz, 1 = 60hz */ u8 first_frame; }; static const struct v4l2_pix_format mode[] = { {160, 120, V4L2_PIX_FMT_CPIA1, V4L2_FIELD_NONE, /* The sizeimage is trial and error, as with low framerates * the camera will pad out usb frames, making the image * data larger than strictly necessary */ .bytesperline = 160, .sizeimage = 65536, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 3}, {176, 144, V4L2_PIX_FMT_CPIA1, V4L2_FIELD_NONE, .bytesperline = 172, .sizeimage = 65536, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 2}, {320, 240, V4L2_PIX_FMT_CPIA1, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 262144, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {352, 288, V4L2_PIX_FMT_CPIA1, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 262144, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; /********************************************************************** * * General functions * **********************************************************************/ static int cpia_usb_transferCmd(struct gspca_dev *gspca_dev, u8 *command) { u8 requesttype; unsigned int pipe; int ret, databytes = command[6] | (command[7] << 8); /* Sometimes we see spurious EPIPE errors */ int retries = 3; if (command[0] == DATA_IN) { pipe = usb_rcvctrlpipe(gspca_dev->dev, 0); requesttype = USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE; } else if (command[0] == DATA_OUT) { pipe = usb_sndctrlpipe(gspca_dev->dev, 0); requesttype = USB_TYPE_VENDOR | USB_RECIP_DEVICE; } else { gspca_err(gspca_dev, "Unexpected first byte of command: %x\n", command[0]); return -EINVAL; } retry: ret = usb_control_msg(gspca_dev->dev, pipe, command[1], requesttype, command[2] | (command[3] << 8), command[4] | (command[5] << 8), gspca_dev->usb_buf, databytes, 1000); if (ret < 0) pr_err("usb_control_msg %02x, error %d\n", command[1], ret); if (ret == -EPIPE && retries > 0) { retries--; goto retry; } return (ret < 0) ? ret : 0; } /* send an arbitrary command to the camera */ static int do_command(struct gspca_dev *gspca_dev, u16 command, u8 a, u8 b, u8 c, u8 d) { struct sd *sd = (struct sd *) gspca_dev; int ret, datasize; u8 cmd[8]; switch (command) { case CPIA_COMMAND_GetCPIAVersion: case CPIA_COMMAND_GetPnPID: case CPIA_COMMAND_GetCameraStatus: case CPIA_COMMAND_GetVPVersion: case CPIA_COMMAND_GetColourParams: case CPIA_COMMAND_GetColourBalance: case CPIA_COMMAND_GetExposure: datasize = 8; break; case CPIA_COMMAND_ReadMCPorts: case CPIA_COMMAND_ReadVCRegs: datasize = 4; break; default: datasize = 0; break; } cmd[0] = command >> 8; cmd[1] = command & 0xff; cmd[2] = a; cmd[3] = b; cmd[4] = c; cmd[5] = d; cmd[6] = datasize; cmd[7] = 0; ret = cpia_usb_transferCmd(gspca_dev, cmd); if (ret) return ret; switch (command) { case CPIA_COMMAND_GetCPIAVersion: sd->params.version.firmwareVersion = gspca_dev->usb_buf[0]; sd->params.version.firmwareRevision = gspca_dev->usb_buf[1]; sd->params.version.vcVersion = gspca_dev->usb_buf[2]; sd->params.version.vcRevision = gspca_dev->usb_buf[3]; break; case CPIA_COMMAND_GetPnPID: sd->params.pnpID.vendor = gspca_dev->usb_buf[0] | (gspca_dev->usb_buf[1] << 8); sd->params.pnpID.product = gspca_dev->usb_buf[2] | (gspca_dev->usb_buf[3] << 8); sd->params.pnpID.deviceRevision = gspca_dev->usb_buf[4] | (gspca_dev->usb_buf[5] << 8); break; case CPIA_COMMAND_GetCameraStatus: sd->params.status.systemState = gspca_dev->usb_buf[0]; sd->params.status.grabState = gspca_dev->usb_buf[1]; sd->params.status.streamState = gspca_dev->usb_buf[2]; sd->params.status.fatalError = gspca_dev->usb_buf[3]; sd->params.status.cmdError = gspca_dev->usb_buf[4]; sd->params.status.debugFlags = gspca_dev->usb_buf[5]; sd->params.status.vpStatus = gspca_dev->usb_buf[6]; sd->params.status.errorCode = gspca_dev->usb_buf[7]; break; case CPIA_COMMAND_GetVPVersion: sd->params.vpVersion.vpVersion = gspca_dev->usb_buf[0]; sd->params.vpVersion.vpRevision = gspca_dev->usb_buf[1]; sd->params.vpVersion.cameraHeadID = gspca_dev->usb_buf[2] | (gspca_dev->usb_buf[3] << 8); break; case CPIA_COMMAND_GetColourParams: sd->params.colourParams.brightness = gspca_dev->usb_buf[0]; sd->params.colourParams.contrast = gspca_dev->usb_buf[1]; sd->params.colourParams.saturation = gspca_dev->usb_buf[2]; break; case CPIA_COMMAND_GetColourBalance: sd->params.colourBalance.redGain = gspca_dev->usb_buf[0]; sd->params.colourBalance.greenGain = gspca_dev->usb_buf[1]; sd->params.colourBalance.blueGain = gspca_dev->usb_buf[2]; break; case CPIA_COMMAND_GetExposure: sd->params.exposure.gain = gspca_dev->usb_buf[0]; sd->params.exposure.fineExp = gspca_dev->usb_buf[1]; sd->params.exposure.coarseExpLo = gspca_dev->usb_buf[2]; sd->params.exposure.coarseExpHi = gspca_dev->usb_buf[3]; sd->params.exposure.redComp = gspca_dev->usb_buf[4]; sd->params.exposure.green1Comp = gspca_dev->usb_buf[5]; sd->params.exposure.green2Comp = gspca_dev->usb_buf[6]; sd->params.exposure.blueComp = gspca_dev->usb_buf[7]; break; case CPIA_COMMAND_ReadMCPorts: /* test button press */ a = ((gspca_dev->usb_buf[1] & 0x02) == 0); if (a != sd->params.qx3.button) { #if IS_ENABLED(CONFIG_INPUT) input_report_key(gspca_dev->input_dev, KEY_CAMERA, a); input_sync(gspca_dev->input_dev); #endif sd->params.qx3.button = a; } if (sd->params.qx3.button) { /* button pressed - unlock the latch */ ret = do_command(gspca_dev, CPIA_COMMAND_WriteMCPort, 3, 0xdf, 0xdf, 0); if (ret) return ret; ret = do_command(gspca_dev, CPIA_COMMAND_WriteMCPort, 3, 0xff, 0xff, 0); if (ret) return ret; } /* test whether microscope is cradled */ sd->params.qx3.cradled = ((gspca_dev->usb_buf[2] & 0x40) == 0); break; } return 0; } /* send a command to the camera with an additional data transaction */ static int do_command_extended(struct gspca_dev *gspca_dev, u16 command, u8 a, u8 b, u8 c, u8 d, u8 e, u8 f, u8 g, u8 h, u8 i, u8 j, u8 k, u8 l) { u8 cmd[8]; cmd[0] = command >> 8; cmd[1] = command & 0xff; cmd[2] = a; cmd[3] = b; cmd[4] = c; cmd[5] = d; cmd[6] = 8; cmd[7] = 0; gspca_dev->usb_buf[0] = e; gspca_dev->usb_buf[1] = f; gspca_dev->usb_buf[2] = g; gspca_dev->usb_buf[3] = h; gspca_dev->usb_buf[4] = i; gspca_dev->usb_buf[5] = j; gspca_dev->usb_buf[6] = k; gspca_dev->usb_buf[7] = l; return cpia_usb_transferCmd(gspca_dev, cmd); } /* find_over_exposure * Finds a suitable value of OverExposure for use with SetFlickerCtrl * Some calculation is required because this value changes with the brightness * set with SetColourParameters * * Parameters: Brightness - last brightness value set with SetColourParameters * * Returns: OverExposure value to use with SetFlickerCtrl */ #define FLICKER_MAX_EXPOSURE 250 #define FLICKER_ALLOWABLE_OVER_EXPOSURE 146 #define FLICKER_BRIGHTNESS_CONSTANT 59 static int find_over_exposure(int brightness) { int MaxAllowableOverExposure, OverExposure; MaxAllowableOverExposure = FLICKER_MAX_EXPOSURE - brightness - FLICKER_BRIGHTNESS_CONSTANT; OverExposure = min(MaxAllowableOverExposure, FLICKER_ALLOWABLE_OVER_EXPOSURE); return OverExposure; } #undef FLICKER_MAX_EXPOSURE #undef FLICKER_ALLOWABLE_OVER_EXPOSURE #undef FLICKER_BRIGHTNESS_CONSTANT /* initialise cam_data structure */ static void reset_camera_params(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; struct cam_params *params = &sd->params; /* The following parameter values are the defaults from * "Software Developer's Guide for CPiA Cameras". Any changes * to the defaults are noted in comments. */ params->colourParams.brightness = BRIGHTNESS_DEF; params->colourParams.contrast = CONTRAST_DEF; params->colourParams.saturation = SATURATION_DEF; params->exposure.gainMode = 4; params->exposure.expMode = 2; /* AEC */ params->exposure.compMode = 1; params->exposure.centreWeight = 1; params->exposure.gain = 0; params->exposure.fineExp = 0; params->exposure.coarseExpLo = 185; params->exposure.coarseExpHi = 0; params->exposure.redComp = COMP_RED; params->exposure.green1Comp = COMP_GREEN1; params->exposure.green2Comp = COMP_GREEN2; params->exposure.blueComp = COMP_BLUE; params->colourBalance.balanceMode = 2; /* ACB */ params->colourBalance.redGain = 32; params->colourBalance.greenGain = 6; params->colourBalance.blueGain = 92; params->apcor.gain1 = 0x18; params->apcor.gain2 = 0x16; params->apcor.gain4 = 0x24; params->apcor.gain8 = 0x34; params->vlOffset.gain1 = 20; params->vlOffset.gain2 = 24; params->vlOffset.gain4 = 26; params->vlOffset.gain8 = 26; params->compressionParams.hysteresis = 3; params->compressionParams.threshMax = 11; params->compressionParams.smallStep = 1; params->compressionParams.largeStep = 3; params->compressionParams.decimationHysteresis = 2; params->compressionParams.frDiffStepThresh = 5; params->compressionParams.qDiffStepThresh = 3; params->compressionParams.decimationThreshMod = 2; /* End of default values from Software Developer's Guide */ /* Set Sensor FPS to 15fps. This seems better than 30fps * for indoor lighting. */ params->sensorFps.divisor = 1; params->sensorFps.baserate = 1; params->flickerControl.flickerMode = 0; params->flickerControl.disabled = 1; params->flickerControl.coarseJump = flicker_jumps[sd->mainsFreq] [params->sensorFps.baserate] [params->sensorFps.divisor]; params->flickerControl.allowableOverExposure = find_over_exposure(params->colourParams.brightness); params->yuvThreshold.yThreshold = 6; /* From windows driver */ params->yuvThreshold.uvThreshold = 6; /* From windows driver */ params->format.subSample = SUBSAMPLE_420; params->format.yuvOrder = YUVORDER_YUYV; params->compression.mode = CPIA_COMPRESSION_AUTO; params->compression.decimation = NO_DECIMATION; params->compressionTarget.frTargeting = COMP_TARGET_DEF; params->compressionTarget.targetFR = 15; /* From windows driver */ params->compressionTarget.targetQ = 5; /* From windows driver */ params->qx3.qx3_detected = 0; params->qx3.toplight = 0; params->qx3.bottomlight = 0; params->qx3.button = 0; params->qx3.cradled = 0; } static void printstatus(struct gspca_dev *gspca_dev, struct cam_params *params) { gspca_dbg(gspca_dev, D_PROBE, "status: %02x %02x %02x %02x %02x %02x %02x %02x\n", params->status.systemState, params->status.grabState, params->status.streamState, params->status.fatalError, params->status.cmdError, params->status.debugFlags, params->status.vpStatus, params->status.errorCode); } static int goto_low_power(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int ret; ret = do_command(gspca_dev, CPIA_COMMAND_GotoLoPower, 0, 0, 0, 0); if (ret) return ret; ret = do_command(gspca_dev, CPIA_COMMAND_GetCameraStatus, 0, 0, 0, 0); if (ret) return ret; if (sd->params.status.systemState != LO_POWER_STATE) { if (sd->params.status.systemState != WARM_BOOT_STATE) { gspca_err(gspca_dev, "unexpected state after lo power cmd: %02x\n", sd->params.status.systemState); printstatus(gspca_dev, &sd->params); } return -EIO; } gspca_dbg(gspca_dev, D_CONF, "camera now in LOW power state\n"); return 0; } static int goto_high_power(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int ret; ret = do_command(gspca_dev, CPIA_COMMAND_GotoHiPower, 0, 0, 0, 0); if (ret) return ret; msleep_interruptible(40); /* windows driver does it too */ if (signal_pending(current)) return -EINTR; ret = do_command(gspca_dev, CPIA_COMMAND_GetCameraStatus, 0, 0, 0, 0); if (ret) return ret; if (sd->params.status.systemState != HI_POWER_STATE) { gspca_err(gspca_dev, "unexpected state after hi power cmd: %02x\n", sd->params.status.systemState); printstatus(gspca_dev, &sd->params); return -EIO; } gspca_dbg(gspca_dev, D_CONF, "camera now in HIGH power state\n"); return 0; } static int get_version_information(struct gspca_dev *gspca_dev) { int ret; /* GetCPIAVersion */ ret = do_command(gspca_dev, CPIA_COMMAND_GetCPIAVersion, 0, 0, 0, 0); if (ret) return ret; /* GetPnPID */ return do_command(gspca_dev, CPIA_COMMAND_GetPnPID, 0, 0, 0, 0); } static int save_camera_state(struct gspca_dev *gspca_dev) { int ret; ret = do_command(gspca_dev, CPIA_COMMAND_GetColourBalance, 0, 0, 0, 0); if (ret) return ret; return do_command(gspca_dev, CPIA_COMMAND_GetExposure, 0, 0, 0, 0); } static int command_setformat(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int ret; ret = do_command(gspca_dev, CPIA_COMMAND_SetFormat, sd->params.format.videoSize, sd->params.format.subSample, sd->params.format.yuvOrder, 0); if (ret) return ret; return do_command(gspca_dev, CPIA_COMMAND_SetROI, sd->params.roi.colStart, sd->params.roi.colEnd, sd->params.roi.rowStart, sd->params.roi.rowEnd); } static int command_setcolourparams(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; return do_command(gspca_dev, CPIA_COMMAND_SetColourParams, sd->params.colourParams.brightness, sd->params.colourParams.contrast, sd->params.colourParams.saturation, 0); } static int command_setapcor(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; return do_command(gspca_dev, CPIA_COMMAND_SetApcor, sd->params.apcor.gain1, sd->params.apcor.gain2, sd->params.apcor.gain4, sd->params.apcor.gain8); } static int command_setvloffset(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; return do_command(gspca_dev, CPIA_COMMAND_SetVLOffset, sd->params.vlOffset.gain1, sd->params.vlOffset.gain2, sd->params.vlOffset.gain4, sd->params.vlOffset.gain8); } static int command_setexposure(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int ret; ret = do_command_extended(gspca_dev, CPIA_COMMAND_SetExposure, sd->params.exposure.gainMode, 1, sd->params.exposure.compMode, sd->params.exposure.centreWeight, sd->params.exposure.gain, sd->params.exposure.fineExp, sd->params.exposure.coarseExpLo, sd->params.exposure.coarseExpHi, sd->params.exposure.redComp, sd->params.exposure.green1Comp, sd->params.exposure.green2Comp, sd->params.exposure.blueComp); if (ret) return ret; if (sd->params.exposure.expMode != 1) { ret = do_command_extended(gspca_dev, CPIA_COMMAND_SetExposure, 0, sd->params.exposure.expMode, 0, 0, sd->params.exposure.gain, sd->params.exposure.fineExp, sd->params.exposure.coarseExpLo, sd->params.exposure.coarseExpHi, 0, 0, 0, 0); } return ret; } static int command_setcolourbalance(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; if (sd->params.colourBalance.balanceMode == 1) { int ret; ret = do_command(gspca_dev, CPIA_COMMAND_SetColourBalance, 1, sd->params.colourBalance.redGain, sd->params.colourBalance.greenGain, sd->params.colourBalance.blueGain); if (ret) return ret; return do_command(gspca_dev, CPIA_COMMAND_SetColourBalance, 3, 0, 0, 0); } if (sd->params.colourBalance.balanceMode == 2) { return do_command(gspca_dev, CPIA_COMMAND_SetColourBalance, 2, 0, 0, 0); } if (sd->params.colourBalance.balanceMode == 3) { return do_command(gspca_dev, CPIA_COMMAND_SetColourBalance, 3, 0, 0, 0); } return -EINVAL; } static int command_setcompressiontarget(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; return do_command(gspca_dev, CPIA_COMMAND_SetCompressionTarget, sd->params.compressionTarget.frTargeting, sd->params.compressionTarget.targetFR, sd->params.compressionTarget.targetQ, 0); } static int command_setyuvtresh(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; return do_command(gspca_dev, CPIA_COMMAND_SetYUVThresh, sd->params.yuvThreshold.yThreshold, sd->params.yuvThreshold.uvThreshold, 0, 0); } static int command_setcompressionparams(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; return do_command_extended(gspca_dev, CPIA_COMMAND_SetCompressionParams, 0, 0, 0, 0, sd->params.compressionParams.hysteresis, sd->params.compressionParams.threshMax, sd->params.compressionParams.smallStep, sd->params.compressionParams.largeStep, sd->params.compressionParams.decimationHysteresis, sd->params.compressionParams.frDiffStepThresh, sd->params.compressionParams.qDiffStepThresh, sd->params.compressionParams.decimationThreshMod); } static int command_setcompression(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; return do_command(gspca_dev, CPIA_COMMAND_SetCompression, sd->params.compression.mode, sd->params.compression.decimation, 0, 0); } static int command_setsensorfps(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; return do_command(gspca_dev, CPIA_COMMAND_SetSensorFPS, sd->params.sensorFps.divisor, sd->params.sensorFps.baserate, 0, 0); } static int command_setflickerctrl(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; return do_command(gspca_dev, CPIA_COMMAND_SetFlickerCtrl, sd->params.flickerControl.flickerMode, sd->params.flickerControl.coarseJump, sd->params.flickerControl.allowableOverExposure, 0); } static int command_setecptiming(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; return do_command(gspca_dev, CPIA_COMMAND_SetECPTiming, sd->params.ecpTiming, 0, 0, 0); } static int command_pause(struct gspca_dev *gspca_dev) { return do_command(gspca_dev, CPIA_COMMAND_EndStreamCap, 0, 0, 0, 0); } static int command_resume(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; return do_command(gspca_dev, CPIA_COMMAND_InitStreamCap, 0, sd->params.streamStartLine, 0, 0); } static int command_setlights(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int ret, p1, p2; p1 = (sd->params.qx3.bottomlight == 0) << 1; p2 = (sd->params.qx3.toplight == 0) << 3; ret = do_command(gspca_dev, CPIA_COMMAND_WriteVCReg, 0x90, 0x8f, 0x50, 0); if (ret) return ret; return do_command(gspca_dev, CPIA_COMMAND_WriteMCPort, 2, 0, p1 | p2 | 0xe0, 0); } static int set_flicker(struct gspca_dev *gspca_dev, int on, int apply) { /* Everything in here is from the Windows driver */ /* define for compgain calculation */ #if 0 #define COMPGAIN(base, curexp, newexp) \ (u8) ((((float) base - 128.0) * ((float) curexp / (float) newexp)) + 128.5) #define EXP_FROM_COMP(basecomp, curcomp, curexp) \ (u16)((float)curexp * (float)(u8)(curcomp + 128) / \ (float)(u8)(basecomp - 128)) #else /* equivalent functions without floating point math */ #define COMPGAIN(base, curexp, newexp) \ (u8)(128 + (((u32)(2*(base-128)*curexp + newexp)) / (2 * newexp))) #define EXP_FROM_COMP(basecomp, curcomp, curexp) \ (u16)(((u32)(curexp * (u8)(curcomp + 128)) / (u8)(basecomp - 128))) #endif struct sd *sd = (struct sd *) gspca_dev; int currentexp = sd->params.exposure.coarseExpLo + sd->params.exposure.coarseExpHi * 256; int ret, startexp; if (on) { int cj = sd->params.flickerControl.coarseJump; sd->params.flickerControl.flickerMode = 1; sd->params.flickerControl.disabled = 0; if (sd->params.exposure.expMode != 2) { sd->params.exposure.expMode = 2; sd->exposure_status = EXPOSURE_NORMAL; } if (sd->params.exposure.gain >= BITS_PER_TYPE(currentexp)) return -EINVAL; currentexp = currentexp << sd->params.exposure.gain; sd->params.exposure.gain = 0; /* round down current exposure to nearest value */ startexp = (currentexp + ROUND_UP_EXP_FOR_FLICKER) / cj; if (startexp < 1) startexp = 1; startexp = (startexp * cj) - 1; if (FIRMWARE_VERSION(1, 2)) while (startexp > MAX_EXP_102) startexp -= cj; else while (startexp > MAX_EXP) startexp -= cj; sd->params.exposure.coarseExpLo = startexp & 0xff; sd->params.exposure.coarseExpHi = startexp >> 8; if (currentexp > startexp) { if (currentexp > (2 * startexp)) currentexp = 2 * startexp; sd->params.exposure.redComp = COMPGAIN(COMP_RED, currentexp, startexp); sd->params.exposure.green1Comp = COMPGAIN(COMP_GREEN1, currentexp, startexp); sd->params.exposure.green2Comp = COMPGAIN(COMP_GREEN2, currentexp, startexp); sd->params.exposure.blueComp = COMPGAIN(COMP_BLUE, currentexp, startexp); } else { sd->params.exposure.redComp = COMP_RED; sd->params.exposure.green1Comp = COMP_GREEN1; sd->params.exposure.green2Comp = COMP_GREEN2; sd->params.exposure.blueComp = COMP_BLUE; } if (FIRMWARE_VERSION(1, 2)) sd->params.exposure.compMode = 0; else sd->params.exposure.compMode = 1; sd->params.apcor.gain1 = 0x18; sd->params.apcor.gain2 = 0x18; sd->params.apcor.gain4 = 0x16; sd->params.apcor.gain8 = 0x14; } else { sd->params.flickerControl.flickerMode = 0; sd->params.flickerControl.disabled = 1; /* Average equivalent coarse for each comp channel */ startexp = EXP_FROM_COMP(COMP_RED, sd->params.exposure.redComp, currentexp); startexp += EXP_FROM_COMP(COMP_GREEN1, sd->params.exposure.green1Comp, currentexp); startexp += EXP_FROM_COMP(COMP_GREEN2, sd->params.exposure.green2Comp, currentexp); startexp += EXP_FROM_COMP(COMP_BLUE, sd->params.exposure.blueComp, currentexp); startexp = startexp >> 2; while (startexp > MAX_EXP && sd->params.exposure.gain < sd->params.exposure.gainMode - 1) { startexp = startexp >> 1; ++sd->params.exposure.gain; } if (FIRMWARE_VERSION(1, 2) && startexp > MAX_EXP_102) startexp = MAX_EXP_102; if (startexp > MAX_EXP) startexp = MAX_EXP; sd->params.exposure.coarseExpLo = startexp & 0xff; sd->params.exposure.coarseExpHi = startexp >> 8; sd->params.exposure.redComp = COMP_RED; sd->params.exposure.green1Comp = COMP_GREEN1; sd->params.exposure.green2Comp = COMP_GREEN2; sd->params.exposure.blueComp = COMP_BLUE; sd->params.exposure.compMode = 1; sd->params.apcor.gain1 = 0x18; sd->params.apcor.gain2 = 0x16; sd->params.apcor.gain4 = 0x24; sd->params.apcor.gain8 = 0x34; } sd->params.vlOffset.gain1 = 20; sd->params.vlOffset.gain2 = 24; sd->params.vlOffset.gain4 = 26; sd->params.vlOffset.gain8 = 26; if (apply) { ret = command_setexposure(gspca_dev); if (ret) return ret; ret = command_setapcor(gspca_dev); if (ret) return ret; ret = command_setvloffset(gspca_dev); if (ret) return ret; ret = command_setflickerctrl(gspca_dev); if (ret) return ret; } return 0; #undef EXP_FROM_COMP #undef COMPGAIN } /* monitor the exposure and adjust the sensor frame rate if needed */ static void monitor_exposure(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; u8 exp_acc, bcomp, cmd[8]; int ret, light_exp, dark_exp, very_dark_exp; int old_exposure, new_exposure, framerate; int setfps = 0, setexp = 0, setflicker = 0; /* get necessary stats and register settings from camera */ /* do_command can't handle this, so do it ourselves */ cmd[0] = CPIA_COMMAND_ReadVPRegs >> 8; cmd[1] = CPIA_COMMAND_ReadVPRegs & 0xff; cmd[2] = 30; cmd[3] = 4; cmd[4] = 9; cmd[5] = 8; cmd[6] = 8; cmd[7] = 0; ret = cpia_usb_transferCmd(gspca_dev, cmd); if (ret) { pr_err("ReadVPRegs(30,4,9,8) - failed: %d\n", ret); return; } exp_acc = gspca_dev->usb_buf[0]; bcomp = gspca_dev->usb_buf[1]; light_exp = sd->params.colourParams.brightness + TC - 50 + EXP_ACC_LIGHT; if (light_exp > 255) light_exp = 255; dark_exp = sd->params.colourParams.brightness + TC - 50 - EXP_ACC_DARK; if (dark_exp < 0) dark_exp = 0; very_dark_exp = dark_exp / 2; old_exposure = sd->params.exposure.coarseExpHi * 256 + sd->params.exposure.coarseExpLo; if (!sd->params.flickerControl.disabled) { /* Flicker control on */ int max_comp = FIRMWARE_VERSION(1, 2) ? MAX_COMP : HIGH_COMP_102; bcomp += 128; /* decode */ if (bcomp >= max_comp && exp_acc < dark_exp) { /* dark */ if (exp_acc < very_dark_exp) { /* very dark */ if (sd->exposure_status == EXPOSURE_VERY_DARK) ++sd->exposure_count; else { sd->exposure_status = EXPOSURE_VERY_DARK; sd->exposure_count = 1; } } else { /* just dark */ if (sd->exposure_status == EXPOSURE_DARK) ++sd->exposure_count; else { sd->exposure_status = EXPOSURE_DARK; sd->exposure_count = 1; } } } else if (old_exposure <= LOW_EXP || exp_acc > light_exp) { /* light */ if (old_exposure <= VERY_LOW_EXP) { /* very light */ if (sd->exposure_status == EXPOSURE_VERY_LIGHT) ++sd->exposure_count; else { sd->exposure_status = EXPOSURE_VERY_LIGHT; sd->exposure_count = 1; } } else { /* just light */ if (sd->exposure_status == EXPOSURE_LIGHT) ++sd->exposure_count; else { sd->exposure_status = EXPOSURE_LIGHT; sd->exposure_count = 1; } } } else { /* not dark or light */ sd->exposure_status = EXPOSURE_NORMAL; } } else { /* Flicker control off */ if (old_exposure >= MAX_EXP && exp_acc < dark_exp) { /* dark */ if (exp_acc < very_dark_exp) { /* very dark */ if (sd->exposure_status == EXPOSURE_VERY_DARK) ++sd->exposure_count; else { sd->exposure_status = EXPOSURE_VERY_DARK; sd->exposure_count = 1; } } else { /* just dark */ if (sd->exposure_status == EXPOSURE_DARK) ++sd->exposure_count; else { sd->exposure_status = EXPOSURE_DARK; sd->exposure_count = 1; } } } else if (old_exposure <= LOW_EXP || exp_acc > light_exp) { /* light */ if (old_exposure <= VERY_LOW_EXP) { /* very light */ if (sd->exposure_status == EXPOSURE_VERY_LIGHT) ++sd->exposure_count; else { sd->exposure_status = EXPOSURE_VERY_LIGHT; sd->exposure_count = 1; } } else { /* just light */ if (sd->exposure_status == EXPOSURE_LIGHT) ++sd->exposure_count; else { sd->exposure_status = EXPOSURE_LIGHT; sd->exposure_count = 1; } } } else { /* not dark or light */ sd->exposure_status = EXPOSURE_NORMAL; } } framerate = atomic_read(&sd->fps); if (framerate > 30 || framerate < 1) framerate = 1; if (!sd->params.flickerControl.disabled) { /* Flicker control on */ if ((sd->exposure_status == EXPOSURE_VERY_DARK || sd->exposure_status == EXPOSURE_DARK) && sd->exposure_count >= DARK_TIME * framerate && sd->params.sensorFps.divisor < 2) { /* dark for too long */ ++sd->params.sensorFps.divisor; setfps = 1; sd->params.flickerControl.coarseJump = flicker_jumps[sd->mainsFreq] [sd->params.sensorFps.baserate] [sd->params.sensorFps.divisor]; setflicker = 1; new_exposure = sd->params.flickerControl.coarseJump-1; while (new_exposure < old_exposure / 2) new_exposure += sd->params.flickerControl.coarseJump; sd->params.exposure.coarseExpLo = new_exposure & 0xff; sd->params.exposure.coarseExpHi = new_exposure >> 8; setexp = 1; sd->exposure_status = EXPOSURE_NORMAL; gspca_dbg(gspca_dev, D_CONF, "Automatically decreasing sensor_fps\n"); } else if ((sd->exposure_status == EXPOSURE_VERY_LIGHT || sd->exposure_status == EXPOSURE_LIGHT) && sd->exposure_count >= LIGHT_TIME * framerate && sd->params.sensorFps.divisor > 0) { /* light for too long */ int max_exp = FIRMWARE_VERSION(1, 2) ? MAX_EXP_102 : MAX_EXP; --sd->params.sensorFps.divisor; setfps = 1; sd->params.flickerControl.coarseJump = flicker_jumps[sd->mainsFreq] [sd->params.sensorFps.baserate] [sd->params.sensorFps.divisor]; setflicker = 1; new_exposure = sd->params.flickerControl.coarseJump-1; while (new_exposure < 2 * old_exposure && new_exposure + sd->params.flickerControl.coarseJump < max_exp) new_exposure += sd->params.flickerControl.coarseJump; sd->params.exposure.coarseExpLo = new_exposure & 0xff; sd->params.exposure.coarseExpHi = new_exposure >> 8; setexp = 1; sd->exposure_status = EXPOSURE_NORMAL; gspca_dbg(gspca_dev, D_CONF, "Automatically increasing sensor_fps\n"); } } else { /* Flicker control off */ if ((sd->exposure_status == EXPOSURE_VERY_DARK || sd->exposure_status == EXPOSURE_DARK) && sd->exposure_count >= DARK_TIME * framerate && sd->params.sensorFps.divisor < 2) { /* dark for too long */ ++sd->params.sensorFps.divisor; setfps = 1; if (sd->params.exposure.gain > 0) { --sd->params.exposure.gain; setexp = 1; } sd->exposure_status = EXPOSURE_NORMAL; gspca_dbg(gspca_dev, D_CONF, "Automatically decreasing sensor_fps\n"); } else if ((sd->exposure_status == EXPOSURE_VERY_LIGHT || sd->exposure_status == EXPOSURE_LIGHT) && sd->exposure_count >= LIGHT_TIME * framerate && sd->params.sensorFps.divisor > 0) { /* light for too long */ --sd->params.sensorFps.divisor; setfps = 1; if (sd->params.exposure.gain < sd->params.exposure.gainMode - 1) { ++sd->params.exposure.gain; setexp = 1; } sd->exposure_status = EXPOSURE_NORMAL; gspca_dbg(gspca_dev, D_CONF, "Automatically increasing sensor_fps\n"); } } if (setexp) command_setexposure(gspca_dev); if (setfps) command_setsensorfps(gspca_dev); if (setflicker) command_setflickerctrl(gspca_dev); } /*-----------------------------------------------------------------*/ /* if flicker is switched off, this function switches it back on.It checks, however, that conditions are suitable before restarting it. This should only be called for firmware version 1.2. It also adjust the colour balance when an exposure step is detected - as long as flicker is running */ static void restart_flicker(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int cam_exposure, old_exp; if (!FIRMWARE_VERSION(1, 2)) return; cam_exposure = atomic_read(&sd->cam_exposure); if (sd->params.flickerControl.flickerMode == 0 || cam_exposure == 0) return; old_exp = sd->params.exposure.coarseExpLo + sd->params.exposure.coarseExpHi*256; /* see how far away camera exposure is from a valid flicker exposure value */ cam_exposure %= sd->params.flickerControl.coarseJump; if (!sd->params.flickerControl.disabled && cam_exposure <= sd->params.flickerControl.coarseJump - 3) { /* Flicker control auto-disabled */ sd->params.flickerControl.disabled = 1; } if (sd->params.flickerControl.disabled && old_exp > sd->params.flickerControl.coarseJump + ROUND_UP_EXP_FOR_FLICKER) { /* exposure is now high enough to switch flicker control back on */ set_flicker(gspca_dev, 1, 1); } } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; sd->mainsFreq = FREQ_DEF == V4L2_CID_POWER_LINE_FREQUENCY_60HZ; reset_camera_params(gspca_dev); gspca_dbg(gspca_dev, D_PROBE, "cpia CPiA camera detected (vid/pid 0x%04X:0x%04X)\n", id->idVendor, id->idProduct); cam = &gspca_dev->cam; cam->cam_mode = mode; cam->nmodes = ARRAY_SIZE(mode); goto_low_power(gspca_dev); /* Check the firmware version. */ sd->params.version.firmwareVersion = 0; get_version_information(gspca_dev); if (sd->params.version.firmwareVersion != 1) { gspca_err(gspca_dev, "only firmware version 1 is supported (got: %d)\n", sd->params.version.firmwareVersion); return -ENODEV; } /* A bug in firmware 1-02 limits gainMode to 2 */ if (sd->params.version.firmwareRevision <= 2 && sd->params.exposure.gainMode > 2) { sd->params.exposure.gainMode = 2; } /* set QX3 detected flag */ sd->params.qx3.qx3_detected = (sd->params.pnpID.vendor == 0x0813 && sd->params.pnpID.product == 0x0001); return 0; } /* -- start the camera -- */ static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int priv, ret; /* Start the camera in low power mode */ if (goto_low_power(gspca_dev)) { if (sd->params.status.systemState != WARM_BOOT_STATE) { gspca_err(gspca_dev, "unexpected systemstate: %02x\n", sd->params.status.systemState); printstatus(gspca_dev, &sd->params); return -ENODEV; } /* FIXME: this is just dirty trial and error */ ret = goto_high_power(gspca_dev); if (ret) return ret; ret = do_command(gspca_dev, CPIA_COMMAND_DiscardFrame, 0, 0, 0, 0); if (ret) return ret; ret = goto_low_power(gspca_dev); if (ret) return ret; } /* procedure described in developer's guide p3-28 */ /* Check the firmware version. */ sd->params.version.firmwareVersion = 0; get_version_information(gspca_dev); /* The fatal error checking should be done after * the camera powers up (developer's guide p 3-38) */ /* Set streamState before transition to high power to avoid bug * in firmware 1-02 */ ret = do_command(gspca_dev, CPIA_COMMAND_ModifyCameraStatus, STREAMSTATE, 0, STREAM_NOT_READY, 0); if (ret) return ret; /* GotoHiPower */ ret = goto_high_power(gspca_dev); if (ret) return ret; /* Check the camera status */ ret = do_command(gspca_dev, CPIA_COMMAND_GetCameraStatus, 0, 0, 0, 0); if (ret) return ret; if (sd->params.status.fatalError) { gspca_err(gspca_dev, "fatal_error: %04x, vp_status: %04x\n", sd->params.status.fatalError, sd->params.status.vpStatus); return -EIO; } /* VPVersion can't be retrieved before the camera is in HiPower, * so get it here instead of in get_version_information. */ ret = do_command(gspca_dev, CPIA_COMMAND_GetVPVersion, 0, 0, 0, 0); if (ret) return ret; /* Determine video mode settings */ sd->params.streamStartLine = 120; priv = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; if (priv & 0x01) { /* crop */ sd->params.roi.colStart = 2; sd->params.roi.rowStart = 6; } else { sd->params.roi.colStart = 0; sd->params.roi.rowStart = 0; } if (priv & 0x02) { /* quarter */ sd->params.format.videoSize = VIDEOSIZE_QCIF; sd->params.roi.colStart /= 2; sd->params.roi.rowStart /= 2; sd->params.streamStartLine /= 2; } else sd->params.format.videoSize = VIDEOSIZE_CIF; sd->params.roi.colEnd = sd->params.roi.colStart + (gspca_dev->pixfmt.width >> 3); sd->params.roi.rowEnd = sd->params.roi.rowStart + (gspca_dev->pixfmt.height >> 2); /* And now set the camera to a known state */ ret = do_command(gspca_dev, CPIA_COMMAND_SetGrabMode, CPIA_GRAB_CONTINEOUS, 0, 0, 0); if (ret) return ret; /* We start with compression disabled, as we need one uncompressed frame to handle later compressed frames */ ret = do_command(gspca_dev, CPIA_COMMAND_SetCompression, CPIA_COMPRESSION_NONE, NO_DECIMATION, 0, 0); if (ret) return ret; ret = command_setcompressiontarget(gspca_dev); if (ret) return ret; ret = command_setcolourparams(gspca_dev); if (ret) return ret; ret = command_setformat(gspca_dev); if (ret) return ret; ret = command_setyuvtresh(gspca_dev); if (ret) return ret; ret = command_setecptiming(gspca_dev); if (ret) return ret; ret = command_setcompressionparams(gspca_dev); if (ret) return ret; ret = command_setexposure(gspca_dev); if (ret) return ret; ret = command_setcolourbalance(gspca_dev); if (ret) return ret; ret = command_setsensorfps(gspca_dev); if (ret) return ret; ret = command_setapcor(gspca_dev); if (ret) return ret; ret = command_setflickerctrl(gspca_dev); if (ret) return ret; ret = command_setvloffset(gspca_dev); if (ret) return ret; /* Start stream */ ret = command_resume(gspca_dev); if (ret) return ret; /* Wait 6 frames before turning compression on for the sensor to get all settings and AEC/ACB to settle */ sd->first_frame = 6; sd->exposure_status = EXPOSURE_NORMAL; sd->exposure_count = 0; atomic_set(&sd->cam_exposure, 0); atomic_set(&sd->fps, 0); return 0; } static void sd_stopN(struct gspca_dev *gspca_dev) { struct sd *sd __maybe_unused = (struct sd *) gspca_dev; command_pause(gspca_dev); /* save camera state for later open (developers guide ch 3.5.3) */ save_camera_state(gspca_dev); /* GotoLoPower */ goto_low_power(gspca_dev); /* Update the camera status */ do_command(gspca_dev, CPIA_COMMAND_GetCameraStatus, 0, 0, 0, 0); #if IS_ENABLED(CONFIG_INPUT) /* If the last button state is pressed, release it now! */ if (sd->params.qx3.button) { /* The camera latch will hold the pressed state until we reset the latch, so we do not reset sd->params.qx3.button now, to avoid a false keypress being reported the next sd_start */ input_report_key(gspca_dev->input_dev, KEY_CAMERA, 0); input_sync(gspca_dev->input_dev); } #endif } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int ret; /* Start / Stop the camera to make sure we are talking to a supported camera, and to get some information from it to print. */ ret = sd_start(gspca_dev); if (ret) return ret; /* Ensure the QX3 illuminators' states are restored upon resume, or disable the illuminator controls, if this isn't a QX3 */ if (sd->params.qx3.qx3_detected) command_setlights(gspca_dev); sd_stopN(gspca_dev); gspca_dbg(gspca_dev, D_PROBE, "CPIA Version: %d.%02d (%d.%d)\n", sd->params.version.firmwareVersion, sd->params.version.firmwareRevision, sd->params.version.vcVersion, sd->params.version.vcRevision); gspca_dbg(gspca_dev, D_PROBE, "CPIA PnP-ID: %04x:%04x:%04x", sd->params.pnpID.vendor, sd->params.pnpID.product, sd->params.pnpID.deviceRevision); gspca_dbg(gspca_dev, D_PROBE, "VP-Version: %d.%d %04x", sd->params.vpVersion.vpVersion, sd->params.vpVersion.vpRevision, sd->params.vpVersion.cameraHeadID); return 0; } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, int len) { struct sd *sd = (struct sd *) gspca_dev; /* Check for SOF */ if (len >= 64 && data[0] == MAGIC_0 && data[1] == MAGIC_1 && data[16] == sd->params.format.videoSize && data[17] == sd->params.format.subSample && data[18] == sd->params.format.yuvOrder && data[24] == sd->params.roi.colStart && data[25] == sd->params.roi.colEnd && data[26] == sd->params.roi.rowStart && data[27] == sd->params.roi.rowEnd) { u8 *image; atomic_set(&sd->cam_exposure, data[39] * 2); atomic_set(&sd->fps, data[41]); /* Check for proper EOF for last frame */ image = gspca_dev->image; if (image != NULL && gspca_dev->image_len > 4 && image[gspca_dev->image_len - 4] == 0xff && image[gspca_dev->image_len - 3] == 0xff && image[gspca_dev->image_len - 2] == 0xff && image[gspca_dev->image_len - 1] == 0xff) gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); return; } gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } static void sd_dq_callback(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; /* Set the normal compression settings once we have captured a few uncompressed frames (and AEC has hopefully settled) */ if (sd->first_frame) { sd->first_frame--; if (sd->first_frame == 0) command_setcompression(gspca_dev); } /* Switch flicker control back on if it got turned off */ restart_flicker(gspca_dev); /* If AEC is enabled, monitor the exposure and adjust the sensor frame rate if needed */ if (sd->params.exposure.expMode == 2) monitor_exposure(gspca_dev); /* Update our knowledge of the camera state */ do_command(gspca_dev, CPIA_COMMAND_GetExposure, 0, 0, 0, 0); do_command(gspca_dev, CPIA_COMMAND_ReadMCPorts, 0, 0, 0, 0); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *)gspca_dev; gspca_dev->usb_err = 0; if (!gspca_dev->streaming && ctrl->id != V4L2_CID_POWER_LINE_FREQUENCY) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: sd->params.colourParams.brightness = ctrl->val; sd->params.flickerControl.allowableOverExposure = find_over_exposure(sd->params.colourParams.brightness); gspca_dev->usb_err = command_setcolourparams(gspca_dev); if (!gspca_dev->usb_err) gspca_dev->usb_err = command_setflickerctrl(gspca_dev); break; case V4L2_CID_CONTRAST: sd->params.colourParams.contrast = ctrl->val; gspca_dev->usb_err = command_setcolourparams(gspca_dev); break; case V4L2_CID_SATURATION: sd->params.colourParams.saturation = ctrl->val; gspca_dev->usb_err = command_setcolourparams(gspca_dev); break; case V4L2_CID_POWER_LINE_FREQUENCY: sd->mainsFreq = ctrl->val == V4L2_CID_POWER_LINE_FREQUENCY_60HZ; sd->params.flickerControl.coarseJump = flicker_jumps[sd->mainsFreq] [sd->params.sensorFps.baserate] [sd->params.sensorFps.divisor]; gspca_dev->usb_err = set_flicker(gspca_dev, ctrl->val != V4L2_CID_POWER_LINE_FREQUENCY_DISABLED, gspca_dev->streaming); break; case V4L2_CID_ILLUMINATORS_1: sd->params.qx3.bottomlight = ctrl->val; gspca_dev->usb_err = command_setlights(gspca_dev); break; case V4L2_CID_ILLUMINATORS_2: sd->params.qx3.toplight = ctrl->val; gspca_dev->usb_err = command_setlights(gspca_dev); break; case CPIA1_CID_COMP_TARGET: sd->params.compressionTarget.frTargeting = ctrl->val; gspca_dev->usb_err = command_setcompressiontarget(gspca_dev); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *)gspca_dev; struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; static const char * const comp_target_menu[] = { "Quality", "Framerate", NULL }; static const struct v4l2_ctrl_config comp_target = { .ops = &sd_ctrl_ops, .id = CPIA1_CID_COMP_TARGET, .type = V4L2_CTRL_TYPE_MENU, .name = "Compression Target", .qmenu = comp_target_menu, .max = 1, .def = COMP_TARGET_DEF, }; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 7); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 100, 1, BRIGHTNESS_DEF); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 96, 8, CONTRAST_DEF); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SATURATION, 0, 100, 1, SATURATION_DEF); sd->freq = v4l2_ctrl_new_std_menu(hdl, &sd_ctrl_ops, V4L2_CID_POWER_LINE_FREQUENCY, V4L2_CID_POWER_LINE_FREQUENCY_60HZ, 0, FREQ_DEF); if (sd->params.qx3.qx3_detected) { v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_ILLUMINATORS_1, 0, 1, 1, ILLUMINATORS_1_DEF); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_ILLUMINATORS_2, 0, 1, 1, ILLUMINATORS_2_DEF); } v4l2_ctrl_new_custom(hdl, &comp_target, NULL); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .dq_callback = sd_dq_callback, .pkt_scan = sd_pkt_scan, #if IS_ENABLED(CONFIG_INPUT) .other_input = 1, #endif }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x0553, 0x0002)}, {USB_DEVICE(0x0813, 0x0001)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver); |
| 38 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_RTNH_H #define __NET_RTNH_H #include <linux/rtnetlink.h> #include <net/netlink.h> static inline int rtnh_ok(const struct rtnexthop *rtnh, int remaining) { return remaining >= (int)sizeof(*rtnh) && rtnh->rtnh_len >= sizeof(*rtnh) && rtnh->rtnh_len <= remaining; } static inline struct rtnexthop *rtnh_next(const struct rtnexthop *rtnh, int *remaining) { int totlen = NLA_ALIGN(rtnh->rtnh_len); *remaining -= totlen; return (struct rtnexthop *) ((char *) rtnh + totlen); } static inline struct nlattr *rtnh_attrs(const struct rtnexthop *rtnh) { return (struct nlattr *) ((char *) rtnh + NLA_ALIGN(sizeof(*rtnh))); } static inline int rtnh_attrlen(const struct rtnexthop *rtnh) { return rtnh->rtnh_len - NLA_ALIGN(sizeof(*rtnh)); } #endif |
| 27 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_IRQDESC_H #define _LINUX_IRQDESC_H #include <linux/rcupdate.h> #include <linux/kobject.h> #include <linux/mutex.h> /* * Core internal functions to deal with irq descriptors */ struct irq_affinity_notify; struct proc_dir_entry; struct module; struct irq_desc; struct irq_domain; struct pt_regs; /** * struct irqstat - interrupt statistics * @cnt: real-time interrupt count * @ref: snapshot of interrupt count */ struct irqstat { unsigned int cnt; #ifdef CONFIG_GENERIC_IRQ_STAT_SNAPSHOT unsigned int ref; #endif }; /** * struct irq_desc - interrupt descriptor * @irq_common_data: per irq and chip data passed down to chip functions * @kstat_irqs: irq stats per cpu * @handle_irq: highlevel irq-events handler * @action: the irq action chain * @status_use_accessors: status information * @core_internal_state__do_not_mess_with_it: core internal status information * @depth: disable-depth, for nested irq_disable() calls * @wake_depth: enable depth, for multiple irq_set_irq_wake() callers * @tot_count: stats field for non-percpu irqs * @irq_count: stats field to detect stalled irqs * @last_unhandled: aging timer for unhandled count * @irqs_unhandled: stats field for spurious unhandled interrupts * @threads_handled: stats field for deferred spurious detection of threaded handlers * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers * @lock: locking for SMP * @affinity_hint: hint to user space for preferred irq affinity * @affinity_notify: context for notification of affinity changes * @pending_mask: pending rebalanced interrupts * @threads_oneshot: bitfield to handle shared oneshot threads * @threads_active: number of irqaction threads currently running * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers * @nr_actions: number of installed actions on this descriptor * @no_suspend_depth: number of irqactions on a irq descriptor with * IRQF_NO_SUSPEND set * @force_resume_depth: number of irqactions on a irq descriptor with * IRQF_FORCE_RESUME set * @rcu: rcu head for delayed free * @kobj: kobject used to represent this struct in sysfs * @request_mutex: mutex to protect request/free before locking desc->lock * @dir: /proc/irq/ procfs entry * @debugfs_file: dentry for the debugfs file * @name: flow handler name for /proc/interrupts output */ struct irq_desc { struct irq_common_data irq_common_data; struct irq_data irq_data; struct irqstat __percpu *kstat_irqs; irq_flow_handler_t handle_irq; struct irqaction *action; /* IRQ action list */ unsigned int status_use_accessors; unsigned int core_internal_state__do_not_mess_with_it; unsigned int depth; /* nested irq disables */ unsigned int wake_depth; /* nested wake enables */ unsigned int tot_count; unsigned int irq_count; /* For detecting broken IRQs */ unsigned long last_unhandled; /* Aging timer for unhandled count */ unsigned int irqs_unhandled; atomic_t threads_handled; int threads_handled_last; raw_spinlock_t lock; struct cpumask *percpu_enabled; #ifdef CONFIG_SMP const struct cpumask *affinity_hint; struct irq_affinity_notify *affinity_notify; #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_var_t pending_mask; #endif #endif unsigned long threads_oneshot; atomic_t threads_active; wait_queue_head_t wait_for_threads; #ifdef CONFIG_PM_SLEEP unsigned int nr_actions; unsigned int no_suspend_depth; unsigned int cond_suspend_depth; unsigned int force_resume_depth; #endif #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; #endif #ifdef CONFIG_GENERIC_IRQ_DEBUGFS struct dentry *debugfs_file; const char *dev_name; #endif #ifdef CONFIG_SPARSE_IRQ struct rcu_head rcu; struct kobject kobj; #endif struct mutex request_mutex; int parent_irq; struct module *owner; const char *name; #ifdef CONFIG_HARDIRQS_SW_RESEND struct hlist_node resend_node; #endif } ____cacheline_internodealigned_in_smp; #ifdef CONFIG_SPARSE_IRQ extern void irq_lock_sparse(void); extern void irq_unlock_sparse(void); #else static inline void irq_lock_sparse(void) { } static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc, unsigned int cpu) { return desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, cpu) : 0; } static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) { return container_of(data->common, struct irq_desc, irq_common_data); } static inline unsigned int irq_desc_get_irq(struct irq_desc *desc) { return desc->irq_data.irq; } static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc) { return &desc->irq_data; } static inline struct irq_chip *irq_desc_get_chip(struct irq_desc *desc) { return desc->irq_data.chip; } static inline void *irq_desc_get_chip_data(struct irq_desc *desc) { return desc->irq_data.chip_data; } static inline void *irq_desc_get_handler_data(struct irq_desc *desc) { return desc->irq_common_data.handler_data; } /* * Architectures call this to let the generic IRQ layer * handle an interrupt. */ static inline void generic_handle_irq_desc(struct irq_desc *desc) { desc->handle_irq(desc); } int handle_irq_desc(struct irq_desc *desc); int generic_handle_irq(unsigned int irq); int generic_handle_irq_safe(unsigned int irq); #ifdef CONFIG_IRQ_DOMAIN /* * Convert a HW interrupt number to a logical one using a IRQ domain, * and handle the result interrupt number. Return -EINVAL if * conversion failed. */ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq); int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq); int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq); #endif /* Test to see if a driver has successfully requested an irq */ static inline int irq_desc_has_action(struct irq_desc *desc) { return desc && desc->action != NULL; } /** * irq_set_handler_locked - Set irq handler from a locked region * @data: Pointer to the irq_data structure which identifies the irq * @handler: Flow control handler function for this interrupt * * Sets the handler in the irq descriptor associated to @data. * * Must be called with irq_desc locked and valid parameters. Typical * call site is the irq_set_type() callback. */ static inline void irq_set_handler_locked(struct irq_data *data, irq_flow_handler_t handler) { struct irq_desc *desc = irq_data_to_desc(data); desc->handle_irq = handler; } /** * irq_set_chip_handler_name_locked - Set chip, handler and name from a locked region * @data: Pointer to the irq_data structure for which the chip is set * @chip: Pointer to the new irq chip * @handler: Flow control handler function for this interrupt * @name: Name of the interrupt * * Replace the irq chip at the proper hierarchy level in @data and * sets the handler and name in the associated irq descriptor. * * Must be called with irq_desc locked and valid parameters. */ static inline void irq_set_chip_handler_name_locked(struct irq_data *data, const struct irq_chip *chip, irq_flow_handler_t handler, const char *name) { struct irq_desc *desc = irq_data_to_desc(data); desc->handle_irq = handler; desc->name = name; data->chip = (struct irq_chip *)chip; } bool irq_check_status_bit(unsigned int irq, unsigned int bitmask); static inline bool irq_balancing_disabled(unsigned int irq) { return irq_check_status_bit(irq, IRQ_NO_BALANCING_MASK); } static inline bool irq_is_percpu(unsigned int irq) { return irq_check_status_bit(irq, IRQ_PER_CPU); } static inline bool irq_is_percpu_devid(unsigned int irq) { return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID); } void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, struct lock_class_key *request_class); static inline void irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, struct lock_class_key *request_class) { if (IS_ENABLED(CONFIG_LOCKDEP)) __irq_set_lockdep_class(irq, lock_class, request_class); } #endif |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_BOOTMEM_INFO_H #define __LINUX_BOOTMEM_INFO_H #include <linux/mm.h> #include <linux/kmemleak.h> /* * Types for free bootmem stored in the low bits of page->private. */ enum bootmem_type { MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 1, SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE, MIX_SECTION_INFO, NODE_INFO, MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO, }; #ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE void __init register_page_bootmem_info_node(struct pglist_data *pgdat); void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long nr_pages); void get_page_bootmem(unsigned long info, struct page *page, enum bootmem_type type); void put_page_bootmem(struct page *page); static inline enum bootmem_type bootmem_type(const struct page *page) { return (unsigned long)page->private & 0xf; } static inline unsigned long bootmem_info(const struct page *page) { return (unsigned long)page->private >> 4; } /* * Any memory allocated via the memblock allocator and not via the * buddy will be marked reserved already in the memmap. For those * pages, we can call this function to free it to buddy allocator. */ static inline void free_bootmem_page(struct page *page) { enum bootmem_type type = bootmem_type(page); /* * The reserve_bootmem_region sets the reserved flag on bootmem * pages. */ VM_BUG_ON_PAGE(page_ref_count(page) != 2, page); if (type == SECTION_INFO || type == MIX_SECTION_INFO) put_page_bootmem(page); else VM_BUG_ON_PAGE(1, page); } #else static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) { } static inline void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long nr_pages) { } static inline void put_page_bootmem(struct page *page) { } static inline enum bootmem_type bootmem_type(const struct page *page) { return SECTION_INFO; } static inline unsigned long bootmem_info(const struct page *page) { return 0; } static inline void get_page_bootmem(unsigned long info, struct page *page, enum bootmem_type type) { } static inline void free_bootmem_page(struct page *page) { kmemleak_free_part_phys(PFN_PHYS(page_to_pfn(page)), PAGE_SIZE); free_reserved_page(page); } #endif #endif /* __LINUX_BOOTMEM_INFO_H */ |
| 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NETDEV_RX_QUEUE_H #define _LINUX_NETDEV_RX_QUEUE_H #include <linux/kobject.h> #include <linux/netdevice.h> #include <linux/sysfs.h> #include <net/xdp.h> #include <net/page_pool/types.h> /* This structure contains an instance of an RX queue. */ struct netdev_rx_queue { struct xdp_rxq_info xdp_rxq; #ifdef CONFIG_RPS struct rps_map __rcu *rps_map; struct rps_dev_flow_table __rcu *rps_flow_table; #endif struct kobject kobj; const struct attribute_group **groups; struct net_device *dev; netdevice_tracker dev_tracker; /* All fields below are "ops protected", * see comment about net_device::lock */ #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif struct napi_struct *napi; struct pp_memory_provider_params mp_params; } ____cacheline_aligned_in_smp; /* * RX queue sysfs structures and functions. */ struct rx_queue_attribute { struct attribute attr; ssize_t (*show)(struct netdev_rx_queue *queue, char *buf); ssize_t (*store)(struct netdev_rx_queue *queue, const char *buf, size_t len); }; static inline struct netdev_rx_queue * __netif_get_rx_queue(struct net_device *dev, unsigned int rxq) { return dev->_rx + rxq; } static inline unsigned int get_netdev_rx_queue_index(struct netdev_rx_queue *queue) { struct net_device *dev = queue->dev; int index = queue - dev->_rx; BUG_ON(index >= dev->num_rx_queues); return index; } int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq); #endif |
| 1 7 9 2 2 6 1 5 6 24 2 2 6 6 5 5 4 4 6 4 4 7 7 2 2 1 1 1 4 1 1 1 6 6 3 6 1 1 7 7 1 6 1 6 1 2 2 22 1 9 7 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 | // SPDX-License-Identifier: GPL-2.0-only /* * Sync File validation framework * * Copyright (C) 2012 Google, Inc. */ #include <linux/file.h> #include <linux/fs.h> #include <linux/uaccess.h> #include <linux/slab.h> #include <linux/sync_file.h> #include "sync_debug.h" #define CREATE_TRACE_POINTS #include "sync_trace.h" /* * SW SYNC validation framework * * A sync object driver that uses a 32bit counter to coordinate * synchronization. Useful when there is no hardware primitive backing * the synchronization. * * To start the framework just open: * * <debugfs>/sync/sw_sync * * That will create a sync timeline, all fences created under this timeline * file descriptor will belong to the this timeline. * * The 'sw_sync' file can be opened many times as to create different * timelines. * * Fences can be created with SW_SYNC_IOC_CREATE_FENCE ioctl with struct * sw_sync_create_fence_data as parameter. * * To increment the timeline counter, SW_SYNC_IOC_INC ioctl should be used * with the increment as u32. This will update the last signaled value * from the timeline and signal any fence that has a seqno smaller or equal * to it. * * struct sw_sync_create_fence_data * @value: the seqno to initialise the fence with * @name: the name of the new sync point * @fence: return the fd of the new sync_file with the created fence */ struct sw_sync_create_fence_data { __u32 value; char name[32]; __s32 fence; /* fd of new fence */ }; /** * struct sw_sync_get_deadline - get the deadline hint of a sw_sync fence * @deadline_ns: absolute time of the deadline * @pad: must be zero * @fence_fd: the sw_sync fence fd (in) * * Return the earliest deadline set on the fence. The timebase for the * deadline is CLOCK_MONOTONIC (same as vblank). If there is no deadline * set on the fence, this ioctl will return -ENOENT. */ struct sw_sync_get_deadline { __u64 deadline_ns; __u32 pad; __s32 fence_fd; }; #define SW_SYNC_IOC_MAGIC 'W' #define SW_SYNC_IOC_CREATE_FENCE _IOWR(SW_SYNC_IOC_MAGIC, 0,\ struct sw_sync_create_fence_data) #define SW_SYNC_IOC_INC _IOW(SW_SYNC_IOC_MAGIC, 1, __u32) #define SW_SYNC_GET_DEADLINE _IOWR(SW_SYNC_IOC_MAGIC, 2, \ struct sw_sync_get_deadline) #define SW_SYNC_HAS_DEADLINE_BIT DMA_FENCE_FLAG_USER_BITS static const struct dma_fence_ops timeline_fence_ops; static inline struct sync_pt *dma_fence_to_sync_pt(struct dma_fence *fence) { if (fence->ops != &timeline_fence_ops) return NULL; return container_of(fence, struct sync_pt, base); } /** * sync_timeline_create() - creates a sync object * @name: sync_timeline name * * Creates a new sync_timeline. Returns the sync_timeline object or NULL in * case of error. */ static struct sync_timeline *sync_timeline_create(const char *name) { struct sync_timeline *obj; obj = kzalloc(sizeof(*obj), GFP_KERNEL); if (!obj) return NULL; kref_init(&obj->kref); obj->context = dma_fence_context_alloc(1); strscpy(obj->name, name, sizeof(obj->name)); obj->pt_tree = RB_ROOT; INIT_LIST_HEAD(&obj->pt_list); spin_lock_init(&obj->lock); sync_timeline_debug_add(obj); return obj; } static void sync_timeline_free(struct kref *kref) { struct sync_timeline *obj = container_of(kref, struct sync_timeline, kref); sync_timeline_debug_remove(obj); kfree(obj); } static void sync_timeline_get(struct sync_timeline *obj) { kref_get(&obj->kref); } static void sync_timeline_put(struct sync_timeline *obj) { kref_put(&obj->kref, sync_timeline_free); } static const char *timeline_fence_get_driver_name(struct dma_fence *fence) { return "sw_sync"; } static const char *timeline_fence_get_timeline_name(struct dma_fence *fence) { struct sync_timeline *parent = dma_fence_parent(fence); return parent->name; } static void timeline_fence_release(struct dma_fence *fence) { struct sync_pt *pt = dma_fence_to_sync_pt(fence); struct sync_timeline *parent = dma_fence_parent(fence); unsigned long flags; spin_lock_irqsave(fence->lock, flags); if (!list_empty(&pt->link)) { list_del(&pt->link); rb_erase(&pt->node, &parent->pt_tree); } spin_unlock_irqrestore(fence->lock, flags); sync_timeline_put(parent); dma_fence_free(fence); } static bool timeline_fence_signaled(struct dma_fence *fence) { struct sync_timeline *parent = dma_fence_parent(fence); return !__dma_fence_is_later(fence, fence->seqno, parent->value); } static void timeline_fence_set_deadline(struct dma_fence *fence, ktime_t deadline) { struct sync_pt *pt = dma_fence_to_sync_pt(fence); unsigned long flags; spin_lock_irqsave(fence->lock, flags); if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { if (ktime_before(deadline, pt->deadline)) pt->deadline = deadline; } else { pt->deadline = deadline; __set_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags); } spin_unlock_irqrestore(fence->lock, flags); } static const struct dma_fence_ops timeline_fence_ops = { .get_driver_name = timeline_fence_get_driver_name, .get_timeline_name = timeline_fence_get_timeline_name, .signaled = timeline_fence_signaled, .release = timeline_fence_release, .set_deadline = timeline_fence_set_deadline, }; /** * sync_timeline_signal() - signal a status change on a sync_timeline * @obj: sync_timeline to signal * @inc: num to increment on timeline->value * * A sync implementation should call this any time one of it's fences * has signaled or has an error condition. */ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) { LIST_HEAD(signalled); struct sync_pt *pt, *next; trace_sync_timeline(obj); spin_lock_irq(&obj->lock); obj->value += inc; list_for_each_entry_safe(pt, next, &obj->pt_list, link) { if (!timeline_fence_signaled(&pt->base)) break; dma_fence_get(&pt->base); list_move_tail(&pt->link, &signalled); rb_erase(&pt->node, &obj->pt_tree); dma_fence_signal_locked(&pt->base); } spin_unlock_irq(&obj->lock); list_for_each_entry_safe(pt, next, &signalled, link) { list_del_init(&pt->link); dma_fence_put(&pt->base); } } /** * sync_pt_create() - creates a sync pt * @obj: parent sync_timeline * @value: value of the fence * * Creates a new sync_pt (fence) as a child of @parent. @size bytes will be * allocated allowing for implementation specific data to be kept after * the generic sync_timeline struct. Returns the sync_pt object or * NULL in case of error. */ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, unsigned int value) { struct sync_pt *pt; pt = kzalloc(sizeof(*pt), GFP_KERNEL); if (!pt) return NULL; sync_timeline_get(obj); dma_fence_init(&pt->base, &timeline_fence_ops, &obj->lock, obj->context, value); INIT_LIST_HEAD(&pt->link); spin_lock_irq(&obj->lock); if (!dma_fence_is_signaled_locked(&pt->base)) { struct rb_node **p = &obj->pt_tree.rb_node; struct rb_node *parent = NULL; while (*p) { struct sync_pt *other; int cmp; parent = *p; other = rb_entry(parent, typeof(*pt), node); cmp = value - other->base.seqno; if (cmp > 0) { p = &parent->rb_right; } else if (cmp < 0) { p = &parent->rb_left; } else { if (dma_fence_get_rcu(&other->base)) { sync_timeline_put(obj); kfree(pt); pt = other; goto unlock; } p = &parent->rb_left; } } rb_link_node(&pt->node, parent, p); rb_insert_color(&pt->node, &obj->pt_tree); parent = rb_next(&pt->node); list_add_tail(&pt->link, parent ? &rb_entry(parent, typeof(*pt), node)->link : &obj->pt_list); } unlock: spin_unlock_irq(&obj->lock); return pt; } /* * *WARNING* * * improper use of this can result in deadlocking kernel drivers from userspace. */ /* opening sw_sync create a new sync obj */ static int sw_sync_debugfs_open(struct inode *inode, struct file *file) { struct sync_timeline *obj; char task_comm[TASK_COMM_LEN]; get_task_comm(task_comm, current); obj = sync_timeline_create(task_comm); if (!obj) return -ENOMEM; file->private_data = obj; return 0; } static int sw_sync_debugfs_release(struct inode *inode, struct file *file) { struct sync_timeline *obj = file->private_data; struct sync_pt *pt, *next; spin_lock_irq(&obj->lock); list_for_each_entry_safe(pt, next, &obj->pt_list, link) { dma_fence_set_error(&pt->base, -ENOENT); dma_fence_signal_locked(&pt->base); } spin_unlock_irq(&obj->lock); sync_timeline_put(obj); return 0; } static long sw_sync_ioctl_create_fence(struct sync_timeline *obj, unsigned long arg) { int fd = get_unused_fd_flags(O_CLOEXEC); int err; struct sync_pt *pt; struct sync_file *sync_file; struct sw_sync_create_fence_data data; if (fd < 0) return fd; if (copy_from_user(&data, (void __user *)arg, sizeof(data))) { err = -EFAULT; goto err; } pt = sync_pt_create(obj, data.value); if (!pt) { err = -ENOMEM; goto err; } sync_file = sync_file_create(&pt->base); dma_fence_put(&pt->base); if (!sync_file) { err = -ENOMEM; goto err; } data.fence = fd; if (copy_to_user((void __user *)arg, &data, sizeof(data))) { fput(sync_file->file); err = -EFAULT; goto err; } fd_install(fd, sync_file->file); return 0; err: put_unused_fd(fd); return err; } static long sw_sync_ioctl_inc(struct sync_timeline *obj, unsigned long arg) { u32 value; if (copy_from_user(&value, (void __user *)arg, sizeof(value))) return -EFAULT; while (value > INT_MAX) { sync_timeline_signal(obj, INT_MAX); value -= INT_MAX; } sync_timeline_signal(obj, value); return 0; } static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long arg) { struct sw_sync_get_deadline data; struct dma_fence *fence; unsigned long flags; struct sync_pt *pt; int ret = 0; if (copy_from_user(&data, (void __user *)arg, sizeof(data))) return -EFAULT; if (data.deadline_ns || data.pad) return -EINVAL; fence = sync_file_get_fence(data.fence_fd); if (!fence) return -EINVAL; pt = dma_fence_to_sync_pt(fence); if (!pt) { ret = -EINVAL; goto put_fence; } spin_lock_irqsave(fence->lock, flags); if (!test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { ret = -ENOENT; goto unlock; } data.deadline_ns = ktime_to_ns(pt->deadline); spin_unlock_irqrestore(fence->lock, flags); dma_fence_put(fence); if (ret) return ret; if (copy_to_user((void __user *)arg, &data, sizeof(data))) return -EFAULT; return 0; unlock: spin_unlock_irqrestore(fence->lock, flags); put_fence: dma_fence_put(fence); return ret; } static long sw_sync_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct sync_timeline *obj = file->private_data; switch (cmd) { case SW_SYNC_IOC_CREATE_FENCE: return sw_sync_ioctl_create_fence(obj, arg); case SW_SYNC_IOC_INC: return sw_sync_ioctl_inc(obj, arg); case SW_SYNC_GET_DEADLINE: return sw_sync_ioctl_get_deadline(obj, arg); default: return -ENOTTY; } } const struct file_operations sw_sync_debugfs_fops = { .open = sw_sync_debugfs_open, .release = sw_sync_debugfs_release, .unlocked_ioctl = sw_sync_ioctl, .compat_ioctl = compat_ptr_ioctl, }; |
| 9189 52 649 574 43 78 121 10247 142 498 9189 653 121 78 10322 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* I/O iterator iteration building functions. * * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _LINUX_IOV_ITER_H #define _LINUX_IOV_ITER_H #include <linux/uio.h> #include <linux/bvec.h> #include <linux/folio_queue.h> typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len, void *priv, void *priv2); typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len, void *priv, void *priv2); /* * Handle ITER_UBUF. */ static __always_inline size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2, iov_ustep_f step) { void __user *base = iter->ubuf; size_t progress = 0, remain; remain = step(base + iter->iov_offset, 0, len, priv, priv2); progress = len - remain; iter->iov_offset += progress; iter->count -= progress; return progress; } /* * Handle ITER_IOVEC. */ static __always_inline size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2, iov_ustep_f step) { const struct iovec *p = iter->__iov; size_t progress = 0, skip = iter->iov_offset; do { size_t remain, consumed; size_t part = min(len, p->iov_len - skip); if (likely(part)) { remain = step(p->iov_base + skip, progress, part, priv, priv2); consumed = part - remain; progress += consumed; skip += consumed; len -= consumed; if (skip < p->iov_len) break; } p++; skip = 0; } while (len); iter->nr_segs -= p - iter->__iov; iter->__iov = p; iter->iov_offset = skip; iter->count -= progress; return progress; } /* * Handle ITER_KVEC. */ static __always_inline size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, iov_step_f step) { const struct kvec *p = iter->kvec; size_t progress = 0, skip = iter->iov_offset; do { size_t remain, consumed; size_t part = min(len, p->iov_len - skip); if (likely(part)) { remain = step(p->iov_base + skip, progress, part, priv, priv2); consumed = part - remain; progress += consumed; skip += consumed; len -= consumed; if (skip < p->iov_len) break; } p++; skip = 0; } while (len); iter->nr_segs -= p - iter->kvec; iter->kvec = p; iter->iov_offset = skip; iter->count -= progress; return progress; } /* * Handle ITER_BVEC. */ static __always_inline size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, iov_step_f step) { const struct bio_vec *p = iter->bvec; size_t progress = 0, skip = iter->iov_offset; do { size_t remain, consumed; size_t offset = p->bv_offset + skip, part; void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE); part = min3(len, (size_t)(p->bv_len - skip), (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2); kunmap_local(kaddr); consumed = part - remain; len -= consumed; progress += consumed; skip += consumed; if (skip >= p->bv_len) { skip = 0; p++; } if (remain) break; } while (len); iter->nr_segs -= p - iter->bvec; iter->bvec = p; iter->iov_offset = skip; iter->count -= progress; return progress; } /* * Handle ITER_FOLIOQ. */ static __always_inline size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2, iov_step_f step) { const struct folio_queue *folioq = iter->folioq; unsigned int slot = iter->folioq_slot; size_t progress = 0, skip = iter->iov_offset; if (slot == folioq_nr_slots(folioq)) { /* The iterator may have been extended. */ folioq = folioq->next; slot = 0; } do { struct folio *folio = folioq_folio(folioq, slot); size_t part, remain = 0, consumed; size_t fsize; void *base; if (!folio) break; fsize = folioq_folio_size(folioq, slot); if (skip < fsize) { base = kmap_local_folio(folio, skip); part = umin(len, PAGE_SIZE - skip % PAGE_SIZE); remain = step(base, progress, part, priv, priv2); kunmap_local(base); consumed = part - remain; len -= consumed; progress += consumed; skip += consumed; } if (skip >= fsize) { skip = 0; slot++; if (slot == folioq_nr_slots(folioq) && folioq->next) { folioq = folioq->next; slot = 0; } } if (remain) break; } while (len); iter->folioq_slot = slot; iter->folioq = folioq; iter->iov_offset = skip; iter->count -= progress; return progress; } /* * Handle ITER_XARRAY. */ static __always_inline size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2, iov_step_f step) { struct folio *folio; size_t progress = 0; loff_t start = iter->xarray_start + iter->iov_offset; pgoff_t index = start / PAGE_SIZE; XA_STATE(xas, iter->xarray, index); rcu_read_lock(); xas_for_each(&xas, folio, ULONG_MAX) { size_t remain, consumed, offset, part, flen; if (xas_retry(&xas, folio)) continue; if (WARN_ON(xa_is_value(folio))) break; if (WARN_ON(folio_test_hugetlb(folio))) break; offset = offset_in_folio(folio, start + progress); flen = min(folio_size(folio) - offset, len); while (flen) { void *base = kmap_local_folio(folio, offset); part = min_t(size_t, flen, PAGE_SIZE - offset_in_page(offset)); remain = step(base, progress, part, priv, priv2); kunmap_local(base); consumed = part - remain; progress += consumed; len -= consumed; if (remain || len == 0) goto out; flen -= consumed; offset += consumed; } } out: rcu_read_unlock(); iter->iov_offset += progress; iter->count -= progress; return progress; } /* * Handle ITER_DISCARD. */ static __always_inline size_t iterate_discard(struct iov_iter *iter, size_t len, void *priv, void *priv2, iov_step_f step) { size_t progress = len; iter->count -= progress; return progress; } /** * iterate_and_advance2 - Iterate over an iterator * @iter: The iterator to iterate over. * @len: The amount to iterate over. * @priv: Data for the step functions. * @priv2: More data for the step functions. * @ustep: Function for UBUF/IOVEC iterators; given __user addresses. * @step: Function for other iterators; given kernel addresses. * * Iterate over the next part of an iterator, up to the specified length. The * buffer is presented in segments, which for kernel iteration are broken up by * physical pages and mapped, with the mapped address being presented. * * Two step functions, @step and @ustep, must be provided, one for handling * mapped kernel addresses and the other is given user addresses which have the * potential to fault since no pinning is performed. * * The step functions are passed the address and length of the segment, @priv, * @priv2 and the amount of data so far iterated over (which can, for example, * be added to @priv to point to the right part of a second buffer). The step * functions should return the amount of the segment they didn't process (ie. 0 * indicates complete processsing). * * This function returns the amount of data processed (ie. 0 means nothing was * processed and the value of @len means processes to completion). */ static __always_inline size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv, void *priv2, iov_ustep_f ustep, iov_step_f step) { if (unlikely(iter->count < len)) len = iter->count; if (unlikely(!len)) return 0; if (likely(iter_is_ubuf(iter))) return iterate_ubuf(iter, len, priv, priv2, ustep); if (likely(iter_is_iovec(iter))) return iterate_iovec(iter, len, priv, priv2, ustep); if (iov_iter_is_bvec(iter)) return iterate_bvec(iter, len, priv, priv2, step); if (iov_iter_is_kvec(iter)) return iterate_kvec(iter, len, priv, priv2, step); if (iov_iter_is_folioq(iter)) return iterate_folioq(iter, len, priv, priv2, step); if (iov_iter_is_xarray(iter)) return iterate_xarray(iter, len, priv, priv2, step); return iterate_discard(iter, len, priv, priv2, step); } /** * iterate_and_advance - Iterate over an iterator * @iter: The iterator to iterate over. * @len: The amount to iterate over. * @priv: Data for the step functions. * @ustep: Function for UBUF/IOVEC iterators; given __user addresses. * @step: Function for other iterators; given kernel addresses. * * As iterate_and_advance2(), but priv2 is always NULL. */ static __always_inline size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv, iov_ustep_f ustep, iov_step_f step) { return iterate_and_advance2(iter, len, priv, NULL, ustep, step); } /** * iterate_and_advance_kernel - Iterate over a kernel-internal iterator * @iter: The iterator to iterate over. * @len: The amount to iterate over. * @priv: Data for the step functions. * @priv2: More data for the step functions. * @step: Function for other iterators; given kernel addresses. * * Iterate over the next part of an iterator, up to the specified length. The * buffer is presented in segments, which for kernel iteration are broken up by * physical pages and mapped, with the mapped address being presented. * * [!] Note This will only handle BVEC, KVEC, FOLIOQ, XARRAY and DISCARD-type * iterators; it will not handle UBUF or IOVEC-type iterators. * * A step functions, @step, must be provided, one for handling mapped kernel * addresses and the other is given user addresses which have the potential to * fault since no pinning is performed. * * The step functions are passed the address and length of the segment, @priv, * @priv2 and the amount of data so far iterated over (which can, for example, * be added to @priv to point to the right part of a second buffer). The step * functions should return the amount of the segment they didn't process (ie. 0 * indicates complete processsing). * * This function returns the amount of data processed (ie. 0 means nothing was * processed and the value of @len means processes to completion). */ static __always_inline size_t iterate_and_advance_kernel(struct iov_iter *iter, size_t len, void *priv, void *priv2, iov_step_f step) { if (unlikely(iter->count < len)) len = iter->count; if (unlikely(!len)) return 0; if (iov_iter_is_bvec(iter)) return iterate_bvec(iter, len, priv, priv2, step); if (iov_iter_is_kvec(iter)) return iterate_kvec(iter, len, priv, priv2, step); if (iov_iter_is_folioq(iter)) return iterate_folioq(iter, len, priv, priv2, step); if (iov_iter_is_xarray(iter)) return iterate_xarray(iter, len, priv, priv2, step); return iterate_discard(iter, len, priv, priv2, step); } #endif /* _LINUX_IOV_ITER_H */ |
| 6 6 6 5 5 480 485 15 15 620 622 52 51 52 52 51 3 3 3 3 3 3 3 9987 10080 10004 9999 445 450 448 448 18 224 184 58 184 26 26 41 41 32 32 126 124 2 53 51 51 19 19 17 17 67 67 67 24 23 37 37 132 133 915 61 55 810 804 804 1692 1691 1694 1694 4512 4569 4496 12 4199 67 92 249 293 294 42 41 10 10 10 10 10 679 686 683 678 36 36 35 35 38 38 37 37 12 12 12 8 1 4 1 2 4 5 4 1 3 3 2 4 27 3 16 6 1 1 3 2 2 3 1 2 4 1 4 2 1 1 26 25 1067 1077 1065 5 5 5 5 43 43 43 43 42 42 8 8 438 443 437 442 313 315 316 316 51 51 1 51 52 7 7 4 23 23 23 23 23 23 23 7 7 7 7 7 35 35 35 35 472 466 215 262 374 212 165 30 359 363 377 32 32 32 15 16 566 562 568 570 571 21 541 512 515 513 515 509 41 471 109 108 109 107 7 101 141 143 143 143 142 7 135 10134 10118 10102 10114 10135 178 9947 9240 1116 146 146 146 150 5 142 33 37 4174 4160 4156 4180 50 4106 1470 2709 77 9038 9040 1 4 1 1 2 3 3 7048 56 56 60 60 60 60 60 60 60 60 60 60 60 60 60 821 818 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 | // SPDX-License-Identifier: GPL-2.0-only /* * AppArmor security module * * This file contains AppArmor LSM hooks. * * Copyright (C) 1998-2008 Novell/SUSE * Copyright 2009-2010 Canonical Ltd. */ #include <linux/lsm_hooks.h> #include <linux/moduleparam.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/ptrace.h> #include <linux/ctype.h> #include <linux/sysctl.h> #include <linux/audit.h> #include <linux/user_namespace.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/zstd.h> #include <net/sock.h> #include <uapi/linux/mount.h> #include <uapi/linux/lsm.h> #include "include/af_unix.h" #include "include/apparmor.h" #include "include/apparmorfs.h" #include "include/audit.h" #include "include/capability.h" #include "include/cred.h" #include "include/crypto.h" #include "include/file.h" #include "include/ipc.h" #include "include/net.h" #include "include/path.h" #include "include/label.h" #include "include/policy.h" #include "include/policy_ns.h" #include "include/procattr.h" #include "include/mount.h" #include "include/secid.h" /* Flag indicating whether initialization completed */ int apparmor_initialized; union aa_buffer { struct list_head list; DECLARE_FLEX_ARRAY(char, buffer); }; struct aa_local_cache { unsigned int hold; unsigned int count; struct list_head head; }; #define RESERVE_COUNT 2 static int reserve_count = RESERVE_COUNT; static int buffer_count; static LIST_HEAD(aa_global_buffers); static DEFINE_SPINLOCK(aa_buffers_lock); static DEFINE_PER_CPU(struct aa_local_cache, aa_local_buffers); /* * LSM hook functions */ /* * put the associated labels */ static void apparmor_cred_free(struct cred *cred) { aa_put_label(cred_label(cred)); set_cred_label(cred, NULL); } /* * allocate the apparmor part of blank credentials */ static int apparmor_cred_alloc_blank(struct cred *cred, gfp_t gfp) { set_cred_label(cred, NULL); return 0; } /* * prepare new cred label for modification by prepare_cred block */ static int apparmor_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp) { set_cred_label(new, aa_get_newest_label(cred_label(old))); return 0; } /* * transfer the apparmor data to a blank set of creds */ static void apparmor_cred_transfer(struct cred *new, const struct cred *old) { set_cred_label(new, aa_get_newest_label(cred_label(old))); } static void apparmor_task_free(struct task_struct *task) { aa_free_task_ctx(task_ctx(task)); } static int apparmor_task_alloc(struct task_struct *task, u64 clone_flags) { struct aa_task_ctx *new = task_ctx(task); aa_dup_task_ctx(new, task_ctx(current)); return 0; } static int apparmor_ptrace_access_check(struct task_struct *child, unsigned int mode) { struct aa_label *tracer, *tracee; const struct cred *cred; int error; bool needput; cred = get_task_cred(child); tracee = cred_label(cred); /* ref count on cred */ tracer = __begin_current_label_crit_section(&needput); error = aa_may_ptrace(current_cred(), tracer, cred, tracee, (mode & PTRACE_MODE_READ) ? AA_PTRACE_READ : AA_PTRACE_TRACE); __end_current_label_crit_section(tracer, needput); put_cred(cred); return error; } static int apparmor_ptrace_traceme(struct task_struct *parent) { struct aa_label *tracer, *tracee; const struct cred *cred; int error; bool needput; tracee = __begin_current_label_crit_section(&needput); cred = get_task_cred(parent); tracer = cred_label(cred); /* ref count on cred */ error = aa_may_ptrace(cred, tracer, current_cred(), tracee, AA_PTRACE_TRACE); put_cred(cred); __end_current_label_crit_section(tracee, needput); return error; } /* Derived from security/commoncap.c:cap_capget */ static int apparmor_capget(const struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { struct aa_label *label; const struct cred *cred; rcu_read_lock(); cred = __task_cred(target); label = aa_get_newest_cred_label(cred); /* * cap_capget is stacked ahead of this and will * initialize effective and permitted. */ if (!unconfined(label)) { struct aa_profile *profile; struct label_it i; label_for_each_confined(i, label, profile) { kernel_cap_t allowed; allowed = aa_profile_capget(profile); *effective = cap_intersect(*effective, allowed); *permitted = cap_intersect(*permitted, allowed); } } rcu_read_unlock(); aa_put_label(label); return 0; } static int apparmor_capable(const struct cred *cred, struct user_namespace *ns, int cap, unsigned int opts) { struct aa_label *label; int error = 0; label = aa_get_newest_cred_label(cred); if (!unconfined(label)) error = aa_capable(cred, label, cap, opts); aa_put_label(label); return error; } /** * common_perm - basic common permission check wrapper fn for paths * @op: operation being checked * @path: path to check permission of (NOT NULL) * @mask: requested permissions mask * @cond: conditional info for the permission request (NOT NULL) * * Returns: %0 else error code if error or permission denied */ static int common_perm(const char *op, const struct path *path, u32 mask, struct path_cond *cond) { struct aa_label *label; int error = 0; bool needput; label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) error = aa_path_perm(op, current_cred(), label, path, 0, mask, cond); __end_current_label_crit_section(label, needput); return error; } /** * common_perm_cond - common permission wrapper around inode cond * @op: operation being checked * @path: location to check (NOT NULL) * @mask: requested permissions mask * * Returns: %0 else error code if error or permission denied */ static int common_perm_cond(const char *op, const struct path *path, u32 mask) { vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_idmap(path->mnt), d_backing_inode(path->dentry)); struct path_cond cond = { vfsuid_into_kuid(vfsuid), d_backing_inode(path->dentry)->i_mode }; if (!path_mediated_fs(path->dentry)) return 0; return common_perm(op, path, mask, &cond); } /** * common_perm_dir_dentry - common permission wrapper when path is dir, dentry * @op: operation being checked * @dir: directory of the dentry (NOT NULL) * @dentry: dentry to check (NOT NULL) * @mask: requested permissions mask * @cond: conditional info for the permission request (NOT NULL) * * Returns: %0 else error code if error or permission denied */ static int common_perm_dir_dentry(const char *op, const struct path *dir, struct dentry *dentry, u32 mask, struct path_cond *cond) { struct path path = { .mnt = dir->mnt, .dentry = dentry }; return common_perm(op, &path, mask, cond); } /** * common_perm_rm - common permission wrapper for operations doing rm * @op: operation being checked * @dir: directory that the dentry is in (NOT NULL) * @dentry: dentry being rm'd (NOT NULL) * @mask: requested permission mask * * Returns: %0 else error code if error or permission denied */ static int common_perm_rm(const char *op, const struct path *dir, struct dentry *dentry, u32 mask) { struct inode *inode = d_backing_inode(dentry); struct path_cond cond = { }; vfsuid_t vfsuid; if (!inode || !path_mediated_fs(dentry)) return 0; vfsuid = i_uid_into_vfsuid(mnt_idmap(dir->mnt), inode); cond.uid = vfsuid_into_kuid(vfsuid); cond.mode = inode->i_mode; return common_perm_dir_dentry(op, dir, dentry, mask, &cond); } /** * common_perm_create - common permission wrapper for operations doing create * @op: operation being checked * @dir: directory that dentry will be created in (NOT NULL) * @dentry: dentry to create (NOT NULL) * @mask: request permission mask * @mode: created file mode * * Returns: %0 else error code if error or permission denied */ static int common_perm_create(const char *op, const struct path *dir, struct dentry *dentry, u32 mask, umode_t mode) { struct path_cond cond = { current_fsuid(), mode }; if (!path_mediated_fs(dir->dentry)) return 0; return common_perm_dir_dentry(op, dir, dentry, mask, &cond); } static int apparmor_path_unlink(const struct path *dir, struct dentry *dentry) { return common_perm_rm(OP_UNLINK, dir, dentry, AA_MAY_DELETE); } static int apparmor_path_mkdir(const struct path *dir, struct dentry *dentry, umode_t mode) { return common_perm_create(OP_MKDIR, dir, dentry, AA_MAY_CREATE, S_IFDIR); } static int apparmor_path_rmdir(const struct path *dir, struct dentry *dentry) { return common_perm_rm(OP_RMDIR, dir, dentry, AA_MAY_DELETE); } static int apparmor_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev) { return common_perm_create(OP_MKNOD, dir, dentry, AA_MAY_CREATE, mode); } static int apparmor_path_truncate(const struct path *path) { return common_perm_cond(OP_TRUNC, path, MAY_WRITE | AA_MAY_SETATTR); } static int apparmor_file_truncate(struct file *file) { return apparmor_path_truncate(&file->f_path); } static int apparmor_path_symlink(const struct path *dir, struct dentry *dentry, const char *old_name) { return common_perm_create(OP_SYMLINK, dir, dentry, AA_MAY_CREATE, S_IFLNK); } static int apparmor_path_link(struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry) { struct aa_label *label; int error = 0; if (!path_mediated_fs(old_dentry)) return 0; label = begin_current_label_crit_section(); if (!unconfined(label)) error = aa_path_link(current_cred(), label, old_dentry, new_dir, new_dentry); end_current_label_crit_section(label); return error; } static int apparmor_path_rename(const struct path *old_dir, struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry, const unsigned int flags) { struct aa_label *label; int error = 0; if (!path_mediated_fs(old_dentry)) return 0; if ((flags & RENAME_EXCHANGE) && !path_mediated_fs(new_dentry)) return 0; label = begin_current_label_crit_section(); if (!unconfined(label)) { struct mnt_idmap *idmap = mnt_idmap(old_dir->mnt); vfsuid_t vfsuid; struct path old_path = { .mnt = old_dir->mnt, .dentry = old_dentry }; struct path new_path = { .mnt = new_dir->mnt, .dentry = new_dentry }; struct path_cond cond = { .mode = d_backing_inode(old_dentry)->i_mode }; vfsuid = i_uid_into_vfsuid(idmap, d_backing_inode(old_dentry)); cond.uid = vfsuid_into_kuid(vfsuid); if (flags & RENAME_EXCHANGE) { struct path_cond cond_exchange = { .mode = d_backing_inode(new_dentry)->i_mode, }; vfsuid = i_uid_into_vfsuid(idmap, d_backing_inode(old_dentry)); cond_exchange.uid = vfsuid_into_kuid(vfsuid); error = aa_path_perm(OP_RENAME_SRC, current_cred(), label, &new_path, 0, MAY_READ | AA_MAY_GETATTR | MAY_WRITE | AA_MAY_SETATTR | AA_MAY_DELETE, &cond_exchange); if (!error) error = aa_path_perm(OP_RENAME_DEST, current_cred(), label, &old_path, 0, MAY_WRITE | AA_MAY_SETATTR | AA_MAY_CREATE, &cond_exchange); } if (!error) error = aa_path_perm(OP_RENAME_SRC, current_cred(), label, &old_path, 0, MAY_READ | AA_MAY_GETATTR | MAY_WRITE | AA_MAY_SETATTR | AA_MAY_DELETE, &cond); if (!error) error = aa_path_perm(OP_RENAME_DEST, current_cred(), label, &new_path, 0, MAY_WRITE | AA_MAY_SETATTR | AA_MAY_CREATE, &cond); } end_current_label_crit_section(label); return error; } static int apparmor_path_chmod(const struct path *path, umode_t mode) { return common_perm_cond(OP_CHMOD, path, AA_MAY_CHMOD); } static int apparmor_path_chown(const struct path *path, kuid_t uid, kgid_t gid) { return common_perm_cond(OP_CHOWN, path, AA_MAY_CHOWN); } static int apparmor_inode_getattr(const struct path *path) { return common_perm_cond(OP_GETATTR, path, AA_MAY_GETATTR); } static int apparmor_file_open(struct file *file) { struct aa_file_ctx *fctx = file_ctx(file); struct aa_label *label; int error = 0; bool needput; if (!path_mediated_fs(file->f_path.dentry)) return 0; /* If in exec, permission is handled by bprm hooks. * Cache permissions granted by the previous exec check, with * implicit read and executable mmap which are required to * actually execute the image. * * Illogically, FMODE_EXEC is in f_flags, not f_mode. */ if (file->f_flags & __FMODE_EXEC) { fctx->allow = MAY_EXEC | MAY_READ | AA_EXEC_MMAP; return 0; } label = aa_get_newest_cred_label_condref(file->f_cred, &needput); if (!unconfined(label)) { struct mnt_idmap *idmap = file_mnt_idmap(file); struct inode *inode = file_inode(file); vfsuid_t vfsuid; struct path_cond cond = { .mode = inode->i_mode, }; vfsuid = i_uid_into_vfsuid(idmap, inode); cond.uid = vfsuid_into_kuid(vfsuid); error = aa_path_perm(OP_OPEN, file->f_cred, label, &file->f_path, 0, aa_map_file_to_perms(file), &cond); /* todo cache full allowed permissions set and state */ fctx->allow = aa_map_file_to_perms(file); } aa_put_label_condref(label, needput); return error; } static int apparmor_file_alloc_security(struct file *file) { struct aa_file_ctx *ctx = file_ctx(file); struct aa_label *label = begin_current_label_crit_section(); spin_lock_init(&ctx->lock); rcu_assign_pointer(ctx->label, aa_get_label(label)); end_current_label_crit_section(label); return 0; } static void apparmor_file_free_security(struct file *file) { struct aa_file_ctx *ctx = file_ctx(file); if (ctx) aa_put_label(rcu_access_pointer(ctx->label)); } static int common_file_perm(const char *op, struct file *file, u32 mask, bool in_atomic) { struct aa_label *label; int error = 0; bool needput; /* don't reaudit files closed during inheritance */ if (unlikely(file->f_path.dentry == aa_null.dentry)) return -EACCES; label = __begin_current_label_crit_section(&needput); error = aa_file_perm(op, current_cred(), label, file, mask, in_atomic); __end_current_label_crit_section(label, needput); return error; } static int apparmor_file_receive(struct file *file) { return common_file_perm(OP_FRECEIVE, file, aa_map_file_to_perms(file), false); } static int apparmor_file_permission(struct file *file, int mask) { return common_file_perm(OP_FPERM, file, mask, false); } static int apparmor_file_lock(struct file *file, unsigned int cmd) { u32 mask = AA_MAY_LOCK; if (cmd == F_WRLCK) mask |= MAY_WRITE; return common_file_perm(OP_FLOCK, file, mask, false); } static int common_mmap(const char *op, struct file *file, unsigned long prot, unsigned long flags, bool in_atomic) { int mask = 0; if (!file || !file_ctx(file)) return 0; if (prot & PROT_READ) mask |= MAY_READ; /* * Private mappings don't require write perms since they don't * write back to the files */ if ((prot & PROT_WRITE) && !(flags & MAP_PRIVATE)) mask |= MAY_WRITE; if (prot & PROT_EXEC) mask |= AA_EXEC_MMAP; return common_file_perm(op, file, mask, in_atomic); } static int apparmor_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags) { return common_mmap(OP_FMMAP, file, prot, flags, GFP_ATOMIC); } static int apparmor_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) { return common_mmap(OP_FMPROT, vma->vm_file, prot, !(vma->vm_flags & VM_SHARED) ? MAP_PRIVATE : 0, false); } #ifdef CONFIG_IO_URING static const char *audit_uring_mask(u32 mask) { if (mask & AA_MAY_CREATE_SQPOLL) return "sqpoll"; if (mask & AA_MAY_OVERRIDE_CRED) return "override_creds"; return ""; } static void audit_uring_cb(struct audit_buffer *ab, void *va) { struct apparmor_audit_data *ad = aad_of_va(va); if (ad->request & AA_URING_PERM_MASK) { audit_log_format(ab, " requested=\"%s\"", audit_uring_mask(ad->request)); if (ad->denied & AA_URING_PERM_MASK) { audit_log_format(ab, " denied=\"%s\"", audit_uring_mask(ad->denied)); } } if (ad->uring.target) { audit_log_format(ab, " tcontext="); aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->uring.target, FLAGS_NONE, GFP_ATOMIC); } } static int profile_uring(struct aa_profile *profile, u32 request, struct aa_label *new, int cap, struct apparmor_audit_data *ad) { unsigned int state; struct aa_ruleset *rules; int error = 0; AA_BUG(!profile); rules = profile->label.rules[0]; state = RULE_MEDIATES(rules, AA_CLASS_IO_URING); if (state) { struct aa_perms perms = { }; if (new) { aa_label_match(profile, rules, new, state, false, request, &perms); } else { perms = *aa_lookup_perms(rules->policy, state); } aa_apply_modes_to_perms(profile, &perms); error = aa_check_perms(profile, &perms, request, ad, audit_uring_cb); } return error; } /** * apparmor_uring_override_creds - check the requested cred override * @new: the target creds * * Check to see if the current task is allowed to override it's credentials * to service an io_uring operation. */ static int apparmor_uring_override_creds(const struct cred *new) { struct aa_profile *profile; struct aa_label *label; int error; bool needput; DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_IO_URING, OP_URING_OVERRIDE); ad.uring.target = cred_label(new); label = __begin_current_label_crit_section(&needput); error = fn_for_each(label, profile, profile_uring(profile, AA_MAY_OVERRIDE_CRED, cred_label(new), CAP_SYS_ADMIN, &ad)); __end_current_label_crit_section(label, needput); return error; } /** * apparmor_uring_sqpoll - check if a io_uring polling thread can be created * * Check to see if the current task is allowed to create a new io_uring * kernel polling thread. */ static int apparmor_uring_sqpoll(void) { struct aa_profile *profile; struct aa_label *label; int error; bool needput; DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_IO_URING, OP_URING_SQPOLL); label = __begin_current_label_crit_section(&needput); error = fn_for_each(label, profile, profile_uring(profile, AA_MAY_CREATE_SQPOLL, NULL, CAP_SYS_ADMIN, &ad)); __end_current_label_crit_section(label, needput); return error; } #endif /* CONFIG_IO_URING */ static int apparmor_sb_mount(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data) { struct aa_label *label; int error = 0; bool needput; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) flags &= ~MS_MGC_MSK; flags &= ~AA_MS_IGNORE_MASK; label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) { if (flags & MS_REMOUNT) error = aa_remount(current_cred(), label, path, flags, data); else if (flags & MS_BIND) error = aa_bind_mount(current_cred(), label, path, dev_name, flags); else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) error = aa_mount_change_type(current_cred(), label, path, flags); else if (flags & MS_MOVE) error = aa_move_mount_old(current_cred(), label, path, dev_name); else error = aa_new_mount(current_cred(), label, dev_name, path, type, flags, data); } __end_current_label_crit_section(label, needput); return error; } static int apparmor_move_mount(const struct path *from_path, const struct path *to_path) { struct aa_label *label; int error = 0; bool needput; label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) error = aa_move_mount(current_cred(), label, from_path, to_path); __end_current_label_crit_section(label, needput); return error; } static int apparmor_sb_umount(struct vfsmount *mnt, int flags) { struct aa_label *label; int error = 0; bool needput; label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) error = aa_umount(current_cred(), label, mnt, flags); __end_current_label_crit_section(label, needput); return error; } static int apparmor_sb_pivotroot(const struct path *old_path, const struct path *new_path) { struct aa_label *label; int error = 0; label = aa_get_current_label(); if (!unconfined(label)) error = aa_pivotroot(current_cred(), label, old_path, new_path); aa_put_label(label); return error; } static int apparmor_getselfattr(unsigned int attr, struct lsm_ctx __user *lx, u32 *size, u32 flags) { int error = -ENOENT; struct aa_task_ctx *ctx = task_ctx(current); struct aa_label *label = NULL; char *value = NULL; switch (attr) { case LSM_ATTR_CURRENT: label = aa_get_newest_label(cred_label(current_cred())); break; case LSM_ATTR_PREV: if (ctx->previous) label = aa_get_newest_label(ctx->previous); break; case LSM_ATTR_EXEC: if (ctx->onexec) label = aa_get_newest_label(ctx->onexec); break; default: error = -EOPNOTSUPP; break; } if (label) { error = aa_getprocattr(label, &value, false); if (error > 0) error = lsm_fill_user_ctx(lx, size, value, error, LSM_ID_APPARMOR, 0); kfree(value); } aa_put_label(label); if (error < 0) return error; return 1; } static int apparmor_getprocattr(struct task_struct *task, const char *name, char **value) { int error = -ENOENT; /* released below */ const struct cred *cred = get_task_cred(task); struct aa_task_ctx *ctx = task_ctx(current); struct aa_label *label = NULL; if (strcmp(name, "current") == 0) label = aa_get_newest_label(cred_label(cred)); else if (strcmp(name, "prev") == 0 && ctx->previous) label = aa_get_newest_label(ctx->previous); else if (strcmp(name, "exec") == 0 && ctx->onexec) label = aa_get_newest_label(ctx->onexec); else error = -EINVAL; if (label) error = aa_getprocattr(label, value, true); aa_put_label(label); put_cred(cred); return error; } static int do_setattr(u64 attr, void *value, size_t size) { char *command, *largs = NULL, *args = value; size_t arg_size; int error; DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, OP_SETPROCATTR); if (size == 0) return -EINVAL; /* AppArmor requires that the buffer must be null terminated atm */ if (args[size - 1] != '\0') { /* null terminate */ largs = args = kmalloc(size + 1, GFP_KERNEL); if (!args) return -ENOMEM; memcpy(args, value, size); args[size] = '\0'; } error = -EINVAL; args = strim(args); command = strsep(&args, " "); if (!args) goto out; args = skip_spaces(args); if (!*args) goto out; arg_size = size - (args - (largs ? largs : (char *) value)); if (attr == LSM_ATTR_CURRENT) { if (strcmp(command, "changehat") == 0) { error = aa_setprocattr_changehat(args, arg_size, AA_CHANGE_NOFLAGS); } else if (strcmp(command, "permhat") == 0) { error = aa_setprocattr_changehat(args, arg_size, AA_CHANGE_TEST); } else if (strcmp(command, "changeprofile") == 0) { error = aa_change_profile(args, AA_CHANGE_NOFLAGS); } else if (strcmp(command, "permprofile") == 0) { error = aa_change_profile(args, AA_CHANGE_TEST); } else if (strcmp(command, "stack") == 0) { error = aa_change_profile(args, AA_CHANGE_STACK); } else goto fail; } else if (attr == LSM_ATTR_EXEC) { if (strcmp(command, "exec") == 0) error = aa_change_profile(args, AA_CHANGE_ONEXEC); else if (strcmp(command, "stack") == 0) error = aa_change_profile(args, (AA_CHANGE_ONEXEC | AA_CHANGE_STACK)); else goto fail; } else /* only support the "current" and "exec" process attributes */ goto fail; if (!error) error = size; out: kfree(largs); return error; fail: ad.subj_label = begin_current_label_crit_section(); if (attr == LSM_ATTR_CURRENT) ad.info = "current"; else if (attr == LSM_ATTR_EXEC) ad.info = "exec"; else ad.info = "invalid"; ad.error = error = -EINVAL; aa_audit_msg(AUDIT_APPARMOR_DENIED, &ad, NULL); end_current_label_crit_section(ad.subj_label); goto out; } static int apparmor_setselfattr(unsigned int attr, struct lsm_ctx *ctx, u32 size, u32 flags) { int rc; if (attr != LSM_ATTR_CURRENT && attr != LSM_ATTR_EXEC) return -EOPNOTSUPP; rc = do_setattr(attr, ctx->ctx, ctx->ctx_len); if (rc > 0) return 0; return rc; } static int apparmor_setprocattr(const char *name, void *value, size_t size) { int attr = lsm_name_to_attr(name); if (attr) return do_setattr(attr, value, size); return -EINVAL; } /** * apparmor_bprm_committing_creds - do task cleanup on committing new creds * @bprm: binprm for the exec (NOT NULL) */ static void apparmor_bprm_committing_creds(const struct linux_binprm *bprm) { struct aa_label *label = aa_current_raw_label(); struct aa_label *new_label = cred_label(bprm->cred); /* bail out if unconfined or not changing profile */ if ((new_label->proxy == label->proxy) || (unconfined(new_label))) return; aa_inherit_files(bprm->cred, current->files); current->pdeath_signal = 0; /* reset soft limits and set hard limits for the new label */ __aa_transition_rlimits(label, new_label); } /** * apparmor_bprm_committed_creds() - do cleanup after new creds committed * @bprm: binprm for the exec (NOT NULL) */ static void apparmor_bprm_committed_creds(const struct linux_binprm *bprm) { /* clear out temporary/transitional state from the context */ aa_clear_task_ctx_trans(task_ctx(current)); return; } static void apparmor_current_getlsmprop_subj(struct lsm_prop *prop) { struct aa_label *label; bool needput; label = __begin_current_label_crit_section(&needput); prop->apparmor.label = label; __end_current_label_crit_section(label, needput); } static void apparmor_task_getlsmprop_obj(struct task_struct *p, struct lsm_prop *prop) { struct aa_label *label = aa_get_task_label(p); prop->apparmor.label = label; aa_put_label(label); } static int apparmor_task_setrlimit(struct task_struct *task, unsigned int resource, struct rlimit *new_rlim) { struct aa_label *label; int error = 0; bool needput; label = __begin_current_label_crit_section(&needput); if (!unconfined(label)) error = aa_task_setrlimit(current_cred(), label, task, resource, new_rlim); __end_current_label_crit_section(label, needput); return error; } static int apparmor_task_kill(struct task_struct *target, struct kernel_siginfo *info, int sig, const struct cred *cred) { const struct cred *tc; struct aa_label *cl, *tl; int error; bool needput; tc = get_task_cred(target); tl = aa_get_newest_cred_label(tc); if (cred) { /* * Dealing with USB IO specific behavior */ cl = aa_get_newest_cred_label(cred); error = aa_may_signal(cred, cl, tc, tl, sig); aa_put_label(cl); } else { cl = __begin_current_label_crit_section(&needput); error = aa_may_signal(current_cred(), cl, tc, tl, sig); __end_current_label_crit_section(cl, needput); } aa_put_label(tl); put_cred(tc); return error; } static int apparmor_userns_create(const struct cred *cred) { struct aa_label *label; struct aa_profile *profile; int error = 0; DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_TASK, AA_CLASS_NS, OP_USERNS_CREATE); ad.subj_cred = current_cred(); label = begin_current_label_crit_section(); if (!unconfined(label)) { error = fn_for_each(label, profile, aa_profile_ns_perm(profile, &ad, AA_USERNS_CREATE)); } end_current_label_crit_section(label); return error; } static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t gfp) { struct aa_sk_ctx *ctx = aa_sock(sk); struct aa_label *label; bool needput; label = __begin_current_label_crit_section(&needput); //spin_lock_init(&ctx->lock); rcu_assign_pointer(ctx->label, aa_get_label(label)); rcu_assign_pointer(ctx->peer, NULL); rcu_assign_pointer(ctx->peer_lastupdate, NULL); __end_current_label_crit_section(label, needput); return 0; } static void apparmor_sk_free_security(struct sock *sk) { struct aa_sk_ctx *ctx = aa_sock(sk); /* dead these won't be updated any more */ aa_put_label(rcu_dereference_protected(ctx->label, true)); aa_put_label(rcu_dereference_protected(ctx->peer, true)); aa_put_label(rcu_dereference_protected(ctx->peer_lastupdate, true)); } /** * apparmor_sk_clone_security - clone the sk_security field * @sk: sock to have security cloned * @newsk: sock getting clone */ static void apparmor_sk_clone_security(const struct sock *sk, struct sock *newsk) { struct aa_sk_ctx *ctx = aa_sock(sk); struct aa_sk_ctx *new = aa_sock(newsk); /* not actually in use yet */ if (rcu_access_pointer(ctx->label) != rcu_access_pointer(new->label)) { aa_put_label(rcu_dereference_protected(new->label, true)); rcu_assign_pointer(new->label, aa_get_label_rcu(&ctx->label)); } if (rcu_access_pointer(ctx->peer) != rcu_access_pointer(new->peer)) { aa_put_label(rcu_dereference_protected(new->peer, true)); rcu_assign_pointer(new->peer, aa_get_label_rcu(&ctx->peer)); } if (rcu_access_pointer(ctx->peer_lastupdate) != rcu_access_pointer(new->peer_lastupdate)) { aa_put_label(rcu_dereference_protected(new->peer_lastupdate, true)); rcu_assign_pointer(new->peer_lastupdate, aa_get_label_rcu(&ctx->peer_lastupdate)); } } static int unix_connect_perm(const struct cred *cred, struct aa_label *label, struct sock *sk, struct sock *peer_sk) { struct aa_sk_ctx *peer_ctx = aa_sock(peer_sk); int error; error = aa_unix_peer_perm(cred, label, OP_CONNECT, (AA_MAY_CONNECT | AA_MAY_SEND | AA_MAY_RECEIVE), sk, peer_sk, rcu_dereference_protected(peer_ctx->label, lockdep_is_held(&unix_sk(peer_sk)->lock))); if (!is_unix_fs(peer_sk)) { last_error(error, aa_unix_peer_perm(cred, rcu_dereference_protected(peer_ctx->label, lockdep_is_held(&unix_sk(peer_sk)->lock)), OP_CONNECT, (AA_MAY_ACCEPT | AA_MAY_SEND | AA_MAY_RECEIVE), peer_sk, sk, label)); } return error; } /* lockdep check in unix_connect_perm - push sks here to check */ static void unix_connect_peers(struct aa_sk_ctx *sk_ctx, struct aa_sk_ctx *peer_ctx) { /* Cross reference the peer labels for SO_PEERSEC */ struct aa_label *label = rcu_dereference_protected(sk_ctx->label, true); aa_get_label(label); aa_put_label(rcu_dereference_protected(peer_ctx->peer, true)); rcu_assign_pointer(peer_ctx->peer, label); /* transfer cnt */ label = aa_get_label(rcu_dereference_protected(peer_ctx->label, true)); //spin_unlock(&peer_ctx->lock); //spin_lock(&sk_ctx->lock); aa_put_label(rcu_dereference_protected(sk_ctx->peer, true)); aa_put_label(rcu_dereference_protected(sk_ctx->peer_lastupdate, true)); rcu_assign_pointer(sk_ctx->peer, aa_get_label(label)); rcu_assign_pointer(sk_ctx->peer_lastupdate, label); /* transfer cnt */ //spin_unlock(&sk_ctx->lock); } /** * apparmor_unix_stream_connect - check perms before making unix domain conn * @sk: sk attempting to connect * @peer_sk: sk that is accepting the connection * @newsk: new sk created for this connection * peer is locked when this hook is called * * Return: * 0 if connection is permitted * error code on denial or failure */ static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk, struct sock *newsk) { struct aa_sk_ctx *sk_ctx = aa_sock(sk); struct aa_sk_ctx *peer_ctx = aa_sock(peer_sk); struct aa_sk_ctx *new_ctx = aa_sock(newsk); struct aa_label *label; int error; bool needput; label = __begin_current_label_crit_section(&needput); error = unix_connect_perm(current_cred(), label, sk, peer_sk); __end_current_label_crit_section(label, needput); if (error) return error; /* newsk doesn't go through post_create, but does go through * security_sk_alloc() */ rcu_assign_pointer(new_ctx->label, aa_get_label(rcu_dereference_protected(peer_ctx->label, true))); /* Cross reference the peer labels for SO_PEERSEC */ unix_connect_peers(sk_ctx, new_ctx); return 0; } /** * apparmor_unix_may_send - check perms before conn or sending unix dgrams * @sock: socket sending the message * @peer: socket message is being send to * * Performs bidirectional permission checks for Unix domain socket communication: * 1. Verifies sender has AA_MAY_SEND to target socket * 2. Verifies receiver has AA_MAY_RECEIVE from source socket * * sock and peer are locked when this hook is called * called by: dgram_connect peer setup but path not copied to newsk * * Return: * 0 if transmission is permitted * error code on denial or failure */ static int apparmor_unix_may_send(struct socket *sock, struct socket *peer) { struct aa_sk_ctx *peer_ctx = aa_sock(peer->sk); struct aa_label *label; int error; bool needput; label = __begin_current_label_crit_section(&needput); error = xcheck(aa_unix_peer_perm(current_cred(), label, OP_SENDMSG, AA_MAY_SEND, sock->sk, peer->sk, rcu_dereference_protected(peer_ctx->label, true)), aa_unix_peer_perm(peer->file ? peer->file->f_cred : NULL, rcu_dereference_protected(peer_ctx->label, true), OP_SENDMSG, AA_MAY_RECEIVE, peer->sk, sock->sk, label)); __end_current_label_crit_section(label, needput); return error; } static int apparmor_socket_create(int family, int type, int protocol, int kern) { struct aa_label *label; int error = 0; AA_BUG(in_interrupt()); if (kern) return 0; label = begin_current_label_crit_section(); if (!unconfined(label)) { if (family == PF_UNIX) error = aa_unix_create_perm(label, family, type, protocol); else error = aa_af_perm(current_cred(), label, OP_CREATE, AA_MAY_CREATE, family, type, protocol); } end_current_label_crit_section(label); return error; } /** * apparmor_socket_post_create - setup the per-socket security struct * @sock: socket that is being setup * @family: family of socket being created * @type: type of the socket * @protocol: protocol of the socket * @kern: socket is a special kernel socket * * Note: * - kernel sockets labeled kernel_t used to use unconfined * - socket may not have sk here if created with sock_create_lite or * sock_alloc. These should be accept cases which will be handled in * sock_graft. */ static int apparmor_socket_post_create(struct socket *sock, int family, int type, int protocol, int kern) { struct aa_label *label; if (kern) { label = aa_get_label(kernel_t); } else label = aa_get_current_label(); if (sock->sk) { struct aa_sk_ctx *ctx = aa_sock(sock->sk); /* still not live */ aa_put_label(rcu_dereference_protected(ctx->label, true)); rcu_assign_pointer(ctx->label, aa_get_label(label)); } aa_put_label(label); return 0; } static int apparmor_socket_socketpair(struct socket *socka, struct socket *sockb) { struct aa_sk_ctx *a_ctx = aa_sock(socka->sk); struct aa_sk_ctx *b_ctx = aa_sock(sockb->sk); struct aa_label *label; /* socks not live yet - initial values set in sk_alloc */ label = begin_current_label_crit_section(); if (rcu_access_pointer(a_ctx->label) != label) { AA_BUG("a_ctx != label"); aa_put_label(rcu_dereference_protected(a_ctx->label, true)); rcu_assign_pointer(a_ctx->label, aa_get_label(label)); } if (rcu_access_pointer(b_ctx->label) != label) { AA_BUG("b_ctx != label"); aa_put_label(rcu_dereference_protected(b_ctx->label, true)); rcu_assign_pointer(b_ctx->label, aa_get_label(label)); } if (socka->sk->sk_family == PF_UNIX) { /* unix socket pairs by-pass unix_stream_connect */ unix_connect_peers(a_ctx, b_ctx); } end_current_label_crit_section(label); return 0; } /** * apparmor_socket_bind - check perms before bind addr to socket * @sock: socket to bind the address to (must be non-NULL) * @address: address that is being bound (must be non-NULL) * @addrlen: length of @address * * Performs security checks before allowing a socket to bind to an address. * Handles Unix domain sockets specially through aa_unix_bind_perm(). * For other socket families, uses generic permission check via aa_sk_perm(). * * Return: * 0 if binding is permitted * error code on denial or invalid parameters */ static int apparmor_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) { AA_BUG(!sock); AA_BUG(!sock->sk); AA_BUG(!address); AA_BUG(in_interrupt()); if (sock->sk->sk_family == PF_UNIX) return aa_unix_bind_perm(sock, address, addrlen); return aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk); } static int apparmor_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen) { AA_BUG(!sock); AA_BUG(!sock->sk); AA_BUG(!address); AA_BUG(in_interrupt()); /* PF_UNIX goes through unix_stream_connect && unix_may_send */ if (sock->sk->sk_family == PF_UNIX) return 0; return aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk); } static int apparmor_socket_listen(struct socket *sock, int backlog) { AA_BUG(!sock); AA_BUG(!sock->sk); AA_BUG(in_interrupt()); if (sock->sk->sk_family == PF_UNIX) return aa_unix_listen_perm(sock, backlog); return aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk); } /* * Note: while @newsock is created and has some information, the accept * has not been done. */ static int apparmor_socket_accept(struct socket *sock, struct socket *newsock) { AA_BUG(!sock); AA_BUG(!sock->sk); AA_BUG(!newsock); AA_BUG(in_interrupt()); if (sock->sk->sk_family == PF_UNIX) return aa_unix_accept_perm(sock, newsock); return aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk); } static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock, struct msghdr *msg, int size) { AA_BUG(!sock); AA_BUG(!sock->sk); AA_BUG(!msg); AA_BUG(in_interrupt()); /* PF_UNIX goes through unix_may_send */ if (sock->sk->sk_family == PF_UNIX) return 0; return aa_sk_perm(op, request, sock->sk); } static int apparmor_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) { return aa_sock_msg_perm(OP_SENDMSG, AA_MAY_SEND, sock, msg, size); } static int apparmor_socket_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags) { return aa_sock_msg_perm(OP_RECVMSG, AA_MAY_RECEIVE, sock, msg, size); } /* revaliation, get/set attr, shutdown */ static int aa_sock_perm(const char *op, u32 request, struct socket *sock) { AA_BUG(!sock); AA_BUG(!sock->sk); AA_BUG(in_interrupt()); if (sock->sk->sk_family == PF_UNIX) return aa_unix_sock_perm(op, request, sock); return aa_sk_perm(op, request, sock->sk); } static int apparmor_socket_getsockname(struct socket *sock) { return aa_sock_perm(OP_GETSOCKNAME, AA_MAY_GETATTR, sock); } static int apparmor_socket_getpeername(struct socket *sock) { return aa_sock_perm(OP_GETPEERNAME, AA_MAY_GETATTR, sock); } /* revaliation, get/set attr, opt */ static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock, int level, int optname) { AA_BUG(!sock); AA_BUG(!sock->sk); AA_BUG(in_interrupt()); if (sock->sk->sk_family == PF_UNIX) return aa_unix_opt_perm(op, request, sock, level, optname); return aa_sk_perm(op, request, sock->sk); } static int apparmor_socket_getsockopt(struct socket *sock, int level, int optname) { return aa_sock_opt_perm(OP_GETSOCKOPT, AA_MAY_GETOPT, sock, level, optname); } static int apparmor_socket_setsockopt(struct socket *sock, int level, int optname) { return aa_sock_opt_perm(OP_SETSOCKOPT, AA_MAY_SETOPT, sock, level, optname); } static int apparmor_socket_shutdown(struct socket *sock, int how) { return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock); } #ifdef CONFIG_NETWORK_SECMARK /** * apparmor_socket_sock_rcv_skb - check perms before associating skb to sk * @sk: sk to associate @skb with * @skb: skb to check for perms * * Note: can not sleep may be called with locks held * * dont want protocol specific in __skb_recv_datagram() * to deny an incoming connection socket_sock_rcv_skb() */ static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct aa_sk_ctx *ctx = aa_sock(sk); int error; if (!skb->secmark) return 0; /* * If reach here before socket_post_create hook is called, in which * case label is null, drop the packet. */ if (!rcu_access_pointer(ctx->label)) return -EACCES; rcu_read_lock(); error = apparmor_secmark_check(rcu_dereference(ctx->label), OP_RECVMSG, AA_MAY_RECEIVE, skb->secmark, sk); rcu_read_unlock(); return error; } #endif static struct aa_label *sk_peer_get_label(struct sock *sk) { struct aa_sk_ctx *ctx = aa_sock(sk); struct aa_label *label = ERR_PTR(-ENOPROTOOPT); if (rcu_access_pointer(ctx->peer)) return aa_get_label_rcu(&ctx->peer); if (sk->sk_family != PF_UNIX) return ERR_PTR(-ENOPROTOOPT); return label; } /** * apparmor_socket_getpeersec_stream - get security context of peer * @sock: socket that we are trying to get the peer context of * @optval: output - buffer to copy peer name to * @optlen: output - size of copied name in @optval * @len: size of @optval buffer * Returns: 0 on success, -errno of failure * * Note: for tcp only valid if using ipsec or cipso on lan */ static int apparmor_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) { char *name = NULL; int slen, error = 0; struct aa_label *label; struct aa_label *peer; peer = sk_peer_get_label(sock->sk); if (IS_ERR(peer)) { error = PTR_ERR(peer); goto done; } label = begin_current_label_crit_section(); slen = aa_label_asxprint(&name, labels_ns(label), peer, FLAG_SHOW_MODE | FLAG_VIEW_SUBNS | FLAG_HIDDEN_UNCONFINED, GFP_KERNEL); /* don't include terminating \0 in slen, it breaks some apps */ if (slen < 0) { error = -ENOMEM; goto done_put; } if (slen > len) { error = -ERANGE; goto done_len; } if (copy_to_sockptr(optval, name, slen)) error = -EFAULT; done_len: if (copy_to_sockptr(optlen, &slen, sizeof(slen))) error = -EFAULT; done_put: end_current_label_crit_section(label); aa_put_label(peer); done: kfree(name); return error; } /** * apparmor_socket_getpeersec_dgram - get security label of packet * @sock: the peer socket * @skb: packet data * @secid: pointer to where to put the secid of the packet * * Sets the netlabel socket state on sk from parent */ static int apparmor_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) { /* TODO: requires secid support */ return -ENOPROTOOPT; } /** * apparmor_sock_graft - Initialize newly created socket * @sk: child sock * @parent: parent socket * * Note: could set off of SOCK_CTX(parent) but need to track inode and we can * just set sk security information off of current creating process label * Labeling of sk for accept case - probably should be sock based * instead of task, because of the case where an implicitly labeled * socket is shared by different tasks. */ static void apparmor_sock_graft(struct sock *sk, struct socket *parent) { struct aa_sk_ctx *ctx = aa_sock(sk); /* setup - not live */ if (!rcu_access_pointer(ctx->label)) rcu_assign_pointer(ctx->label, aa_get_current_label()); } #ifdef CONFIG_NETWORK_SECMARK static int apparmor_inet_conn_request(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { struct aa_sk_ctx *ctx = aa_sock(sk); int error; if (!skb->secmark) return 0; rcu_read_lock(); error = apparmor_secmark_check(rcu_dereference(ctx->label), OP_CONNECT, AA_MAY_CONNECT, skb->secmark, sk); rcu_read_unlock(); return error; } #endif /* * The cred blob is a pointer to, not an instance of, an aa_label. */ struct lsm_blob_sizes apparmor_blob_sizes __ro_after_init = { .lbs_cred = sizeof(struct aa_label *), .lbs_file = sizeof(struct aa_file_ctx), .lbs_task = sizeof(struct aa_task_ctx), .lbs_sock = sizeof(struct aa_sk_ctx), }; static const struct lsm_id apparmor_lsmid = { .name = "apparmor", .id = LSM_ID_APPARMOR, }; static struct security_hook_list apparmor_hooks[] __ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme), LSM_HOOK_INIT(capget, apparmor_capget), LSM_HOOK_INIT(capable, apparmor_capable), LSM_HOOK_INIT(move_mount, apparmor_move_mount), LSM_HOOK_INIT(sb_mount, apparmor_sb_mount), LSM_HOOK_INIT(sb_umount, apparmor_sb_umount), LSM_HOOK_INIT(sb_pivotroot, apparmor_sb_pivotroot), LSM_HOOK_INIT(path_link, apparmor_path_link), LSM_HOOK_INIT(path_unlink, apparmor_path_unlink), LSM_HOOK_INIT(path_symlink, apparmor_path_symlink), LSM_HOOK_INIT(path_mkdir, apparmor_path_mkdir), LSM_HOOK_INIT(path_rmdir, apparmor_path_rmdir), LSM_HOOK_INIT(path_mknod, apparmor_path_mknod), LSM_HOOK_INIT(path_rename, apparmor_path_rename), LSM_HOOK_INIT(path_chmod, apparmor_path_chmod), LSM_HOOK_INIT(path_chown, apparmor_path_chown), LSM_HOOK_INIT(path_truncate, apparmor_path_truncate), LSM_HOOK_INIT(inode_getattr, apparmor_inode_getattr), LSM_HOOK_INIT(file_open, apparmor_file_open), LSM_HOOK_INIT(file_receive, apparmor_file_receive), LSM_HOOK_INIT(file_permission, apparmor_file_permission), LSM_HOOK_INIT(file_alloc_security, apparmor_file_alloc_security), LSM_HOOK_INIT(file_free_security, apparmor_file_free_security), LSM_HOOK_INIT(mmap_file, apparmor_mmap_file), LSM_HOOK_INIT(file_mprotect, apparmor_file_mprotect), LSM_HOOK_INIT(file_lock, apparmor_file_lock), LSM_HOOK_INIT(file_truncate, apparmor_file_truncate), LSM_HOOK_INIT(getselfattr, apparmor_getselfattr), LSM_HOOK_INIT(setselfattr, apparmor_setselfattr), LSM_HOOK_INIT(getprocattr, apparmor_getprocattr), LSM_HOOK_INIT(setprocattr, apparmor_setprocattr), LSM_HOOK_INIT(sk_alloc_security, apparmor_sk_alloc_security), LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security), LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security), LSM_HOOK_INIT(unix_stream_connect, apparmor_unix_stream_connect), LSM_HOOK_INIT(unix_may_send, apparmor_unix_may_send), LSM_HOOK_INIT(socket_create, apparmor_socket_create), LSM_HOOK_INIT(socket_post_create, apparmor_socket_post_create), LSM_HOOK_INIT(socket_socketpair, apparmor_socket_socketpair), LSM_HOOK_INIT(socket_bind, apparmor_socket_bind), LSM_HOOK_INIT(socket_connect, apparmor_socket_connect), LSM_HOOK_INIT(socket_listen, apparmor_socket_listen), LSM_HOOK_INIT(socket_accept, apparmor_socket_accept), LSM_HOOK_INIT(socket_sendmsg, apparmor_socket_sendmsg), LSM_HOOK_INIT(socket_recvmsg, apparmor_socket_recvmsg), LSM_HOOK_INIT(socket_getsockname, apparmor_socket_getsockname), LSM_HOOK_INIT(socket_getpeername, apparmor_socket_getpeername), LSM_HOOK_INIT(socket_getsockopt, apparmor_socket_getsockopt), LSM_HOOK_INIT(socket_setsockopt, apparmor_socket_setsockopt), LSM_HOOK_INIT(socket_shutdown, apparmor_socket_shutdown), #ifdef CONFIG_NETWORK_SECMARK LSM_HOOK_INIT(socket_sock_rcv_skb, apparmor_socket_sock_rcv_skb), #endif LSM_HOOK_INIT(socket_getpeersec_stream, apparmor_socket_getpeersec_stream), LSM_HOOK_INIT(socket_getpeersec_dgram, apparmor_socket_getpeersec_dgram), LSM_HOOK_INIT(sock_graft, apparmor_sock_graft), #ifdef CONFIG_NETWORK_SECMARK LSM_HOOK_INIT(inet_conn_request, apparmor_inet_conn_request), #endif LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank), LSM_HOOK_INIT(cred_free, apparmor_cred_free), LSM_HOOK_INIT(cred_prepare, apparmor_cred_prepare), LSM_HOOK_INIT(cred_transfer, apparmor_cred_transfer), LSM_HOOK_INIT(bprm_creds_for_exec, apparmor_bprm_creds_for_exec), LSM_HOOK_INIT(bprm_committing_creds, apparmor_bprm_committing_creds), LSM_HOOK_INIT(bprm_committed_creds, apparmor_bprm_committed_creds), LSM_HOOK_INIT(task_free, apparmor_task_free), LSM_HOOK_INIT(task_alloc, apparmor_task_alloc), LSM_HOOK_INIT(current_getlsmprop_subj, apparmor_current_getlsmprop_subj), LSM_HOOK_INIT(task_getlsmprop_obj, apparmor_task_getlsmprop_obj), LSM_HOOK_INIT(task_setrlimit, apparmor_task_setrlimit), LSM_HOOK_INIT(task_kill, apparmor_task_kill), LSM_HOOK_INIT(userns_create, apparmor_userns_create), #ifdef CONFIG_AUDIT LSM_HOOK_INIT(audit_rule_init, aa_audit_rule_init), LSM_HOOK_INIT(audit_rule_known, aa_audit_rule_known), LSM_HOOK_INIT(audit_rule_match, aa_audit_rule_match), LSM_HOOK_INIT(audit_rule_free, aa_audit_rule_free), #endif LSM_HOOK_INIT(secid_to_secctx, apparmor_secid_to_secctx), LSM_HOOK_INIT(lsmprop_to_secctx, apparmor_lsmprop_to_secctx), LSM_HOOK_INIT(secctx_to_secid, apparmor_secctx_to_secid), LSM_HOOK_INIT(release_secctx, apparmor_release_secctx), #ifdef CONFIG_IO_URING LSM_HOOK_INIT(uring_override_creds, apparmor_uring_override_creds), LSM_HOOK_INIT(uring_sqpoll, apparmor_uring_sqpoll), #endif }; /* * AppArmor sysfs module parameters */ static int param_set_aabool(const char *val, const struct kernel_param *kp); static int param_get_aabool(char *buffer, const struct kernel_param *kp); #define param_check_aabool param_check_bool static const struct kernel_param_ops param_ops_aabool = { .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_aabool, .get = param_get_aabool }; static int param_set_aauint(const char *val, const struct kernel_param *kp); static int param_get_aauint(char *buffer, const struct kernel_param *kp); #define param_check_aauint param_check_uint static const struct kernel_param_ops param_ops_aauint = { .set = param_set_aauint, .get = param_get_aauint }; static int param_set_aacompressionlevel(const char *val, const struct kernel_param *kp); static int param_get_aacompressionlevel(char *buffer, const struct kernel_param *kp); #define param_check_aacompressionlevel param_check_int static const struct kernel_param_ops param_ops_aacompressionlevel = { .set = param_set_aacompressionlevel, .get = param_get_aacompressionlevel }; static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp); static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp); #define param_check_aalockpolicy param_check_bool static const struct kernel_param_ops param_ops_aalockpolicy = { .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_aalockpolicy, .get = param_get_aalockpolicy }; static int param_set_debug(const char *val, const struct kernel_param *kp); static int param_get_debug(char *buffer, const struct kernel_param *kp); static int param_set_audit(const char *val, const struct kernel_param *kp); static int param_get_audit(char *buffer, const struct kernel_param *kp); static int param_set_mode(const char *val, const struct kernel_param *kp); static int param_get_mode(char *buffer, const struct kernel_param *kp); /* Flag values, also controllable via /sys/module/apparmor/parameters * We define special types as we want to do additional mediation. */ /* AppArmor global enforcement switch - complain, enforce, kill */ enum profile_mode aa_g_profile_mode = APPARMOR_ENFORCE; module_param_call(mode, param_set_mode, param_get_mode, &aa_g_profile_mode, S_IRUSR | S_IWUSR); /* whether policy verification hashing is enabled */ bool aa_g_hash_policy = IS_ENABLED(CONFIG_SECURITY_APPARMOR_HASH_DEFAULT); #ifdef CONFIG_SECURITY_APPARMOR_HASH module_param_named(hash_policy, aa_g_hash_policy, aabool, S_IRUSR | S_IWUSR); #endif /* whether policy exactly as loaded is retained for debug and checkpointing */ bool aa_g_export_binary = IS_ENABLED(CONFIG_SECURITY_APPARMOR_EXPORT_BINARY); #ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY module_param_named(export_binary, aa_g_export_binary, aabool, 0600); #endif /* policy loaddata compression level */ int aa_g_rawdata_compression_level = AA_DEFAULT_CLEVEL; module_param_named(rawdata_compression_level, aa_g_rawdata_compression_level, aacompressionlevel, 0400); /* Debug mode */ int aa_g_debug; module_param_call(debug, param_set_debug, param_get_debug, &aa_g_debug, 0600); /* Audit mode */ enum audit_mode aa_g_audit; module_param_call(audit, param_set_audit, param_get_audit, &aa_g_audit, S_IRUSR | S_IWUSR); /* Determines if audit header is included in audited messages. This * provides more context if the audit daemon is not running */ bool aa_g_audit_header = true; module_param_named(audit_header, aa_g_audit_header, aabool, S_IRUSR | S_IWUSR); /* lock out loading/removal of policy * TODO: add in at boot loading of policy, which is the only way to * load policy, if lock_policy is set */ bool aa_g_lock_policy; module_param_named(lock_policy, aa_g_lock_policy, aalockpolicy, S_IRUSR | S_IWUSR); /* Syscall logging mode */ bool aa_g_logsyscall; module_param_named(logsyscall, aa_g_logsyscall, aabool, S_IRUSR | S_IWUSR); /* Maximum pathname length before accesses will start getting rejected */ unsigned int aa_g_path_max = 2 * PATH_MAX; module_param_named(path_max, aa_g_path_max, aauint, S_IRUSR); /* Determines how paranoid loading of policy is and how much verification * on the loaded policy is done. * DEPRECATED: read only as strict checking of load is always done now * that none root users (user namespaces) can load policy. */ bool aa_g_paranoid_load = IS_ENABLED(CONFIG_SECURITY_APPARMOR_PARANOID_LOAD); module_param_named(paranoid_load, aa_g_paranoid_load, aabool, S_IRUGO); static int param_get_aaintbool(char *buffer, const struct kernel_param *kp); static int param_set_aaintbool(const char *val, const struct kernel_param *kp); #define param_check_aaintbool param_check_int static const struct kernel_param_ops param_ops_aaintbool = { .set = param_set_aaintbool, .get = param_get_aaintbool }; /* Boot time disable flag */ static int apparmor_enabled __ro_after_init = 1; module_param_named(enabled, apparmor_enabled, aaintbool, 0444); static int __init apparmor_enabled_setup(char *str) { unsigned long enabled; int error = kstrtoul(str, 0, &enabled); if (!error) apparmor_enabled = enabled ? 1 : 0; return 1; } __setup("apparmor=", apparmor_enabled_setup); /* set global flag turning off the ability to load policy */ static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) return -EPERM; return param_set_bool(val, kp); } static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) return -EPERM; return param_get_bool(buffer, kp); } static int param_set_aabool(const char *val, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) return -EPERM; return param_set_bool(val, kp); } static int param_get_aabool(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) return -EPERM; return param_get_bool(buffer, kp); } static int param_set_aauint(const char *val, const struct kernel_param *kp) { int error; if (!apparmor_enabled) return -EINVAL; /* file is ro but enforce 2nd line check */ if (apparmor_initialized) return -EPERM; error = param_set_uint(val, kp); aa_g_path_max = max_t(uint32_t, aa_g_path_max, sizeof(union aa_buffer)); pr_info("AppArmor: buffer size set to %d bytes\n", aa_g_path_max); return error; } static int param_get_aauint(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) return -EPERM; return param_get_uint(buffer, kp); } /* Can only be set before AppArmor is initialized (i.e. on boot cmdline). */ static int param_set_aaintbool(const char *val, const struct kernel_param *kp) { struct kernel_param kp_local; bool value; int error; if (apparmor_initialized) return -EPERM; /* Create local copy, with arg pointing to bool type. */ value = !!*((int *)kp->arg); memcpy(&kp_local, kp, sizeof(kp_local)); kp_local.arg = &value; error = param_set_bool(val, &kp_local); if (!error) *((int *)kp->arg) = *((bool *)kp_local.arg); return error; } /* * To avoid changing /sys/module/apparmor/parameters/enabled from Y/N to * 1/0, this converts the "int that is actually bool" back to bool for * display in the /sys filesystem, while keeping it "int" for the LSM * infrastructure. */ static int param_get_aaintbool(char *buffer, const struct kernel_param *kp) { struct kernel_param kp_local; bool value; /* Create local copy, with arg pointing to bool type. */ value = !!*((int *)kp->arg); memcpy(&kp_local, kp, sizeof(kp_local)); kp_local.arg = &value; return param_get_bool(buffer, &kp_local); } static int param_set_aacompressionlevel(const char *val, const struct kernel_param *kp) { int error; if (!apparmor_enabled) return -EINVAL; if (apparmor_initialized) return -EPERM; error = param_set_int(val, kp); aa_g_rawdata_compression_level = clamp(aa_g_rawdata_compression_level, AA_MIN_CLEVEL, AA_MAX_CLEVEL); pr_info("AppArmor: policy rawdata compression level set to %d\n", aa_g_rawdata_compression_level); return error; } static int param_get_aacompressionlevel(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) return -EPERM; return param_get_int(buffer, kp); } static int param_get_debug(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) return -EPERM; return aa_print_debug_params(buffer); } static int param_set_debug(const char *val, const struct kernel_param *kp) { int i; if (!apparmor_enabled) return -EINVAL; if (!val) return -EINVAL; if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) return -EPERM; i = aa_parse_debug_params(val); if (i == DEBUG_PARSE_ERROR) return -EINVAL; aa_g_debug = i; return 0; } static int param_get_audit(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) return -EPERM; return sprintf(buffer, "%s", audit_mode_names[aa_g_audit]); } static int param_set_audit(const char *val, const struct kernel_param *kp) { int i; if (!apparmor_enabled) return -EINVAL; if (!val) return -EINVAL; if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) return -EPERM; i = match_string(audit_mode_names, AUDIT_MAX_INDEX, val); if (i < 0) return -EINVAL; aa_g_audit = i; return 0; } static int param_get_mode(char *buffer, const struct kernel_param *kp) { if (!apparmor_enabled) return -EINVAL; if (apparmor_initialized && !aa_current_policy_view_capable(NULL)) return -EPERM; return sprintf(buffer, "%s", aa_profile_mode_names[aa_g_profile_mode]); } static int param_set_mode(const char *val, const struct kernel_param *kp) { int i; if (!apparmor_enabled) return -EINVAL; if (!val) return -EINVAL; if (apparmor_initialized && !aa_current_policy_admin_capable(NULL)) return -EPERM; i = match_string(aa_profile_mode_names, APPARMOR_MODE_NAMES_MAX_INDEX, val); if (i < 0) return -EINVAL; aa_g_profile_mode = i; return 0; } char *aa_get_buffer(bool in_atomic) { union aa_buffer *aa_buf; struct aa_local_cache *cache; bool try_again = true; gfp_t flags = (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); /* use per cpu cached buffers first */ cache = get_cpu_ptr(&aa_local_buffers); if (!list_empty(&cache->head)) { aa_buf = list_first_entry(&cache->head, union aa_buffer, list); list_del(&aa_buf->list); cache->hold--; cache->count--; put_cpu_ptr(&aa_local_buffers); return &aa_buf->buffer[0]; } put_cpu_ptr(&aa_local_buffers); if (!spin_trylock(&aa_buffers_lock)) { cache = get_cpu_ptr(&aa_local_buffers); cache->hold += 1; put_cpu_ptr(&aa_local_buffers); spin_lock(&aa_buffers_lock); } else { cache = get_cpu_ptr(&aa_local_buffers); put_cpu_ptr(&aa_local_buffers); } retry: if (buffer_count > reserve_count || (in_atomic && !list_empty(&aa_global_buffers))) { aa_buf = list_first_entry(&aa_global_buffers, union aa_buffer, list); list_del(&aa_buf->list); buffer_count--; spin_unlock(&aa_buffers_lock); return aa_buf->buffer; } if (in_atomic) { /* * out of reserve buffers and in atomic context so increase * how many buffers to keep in reserve */ reserve_count++; flags = GFP_ATOMIC; } spin_unlock(&aa_buffers_lock); if (!in_atomic) might_sleep(); aa_buf = kmalloc(aa_g_path_max, flags); if (!aa_buf) { if (try_again) { try_again = false; spin_lock(&aa_buffers_lock); goto retry; } pr_warn_once("AppArmor: Failed to allocate a memory buffer.\n"); return NULL; } return aa_buf->buffer; } void aa_put_buffer(char *buf) { union aa_buffer *aa_buf; struct aa_local_cache *cache; if (!buf) return; aa_buf = container_of(buf, union aa_buffer, buffer[0]); cache = get_cpu_ptr(&aa_local_buffers); if (!cache->hold) { put_cpu_ptr(&aa_local_buffers); if (spin_trylock(&aa_buffers_lock)) { /* put back on global list */ list_add(&aa_buf->list, &aa_global_buffers); buffer_count++; spin_unlock(&aa_buffers_lock); cache = get_cpu_ptr(&aa_local_buffers); put_cpu_ptr(&aa_local_buffers); return; } /* contention on global list, fallback to percpu */ cache = get_cpu_ptr(&aa_local_buffers); cache->hold += 1; } /* cache in percpu list */ list_add(&aa_buf->list, &cache->head); cache->count++; put_cpu_ptr(&aa_local_buffers); } /* * AppArmor init functions */ /** * set_init_ctx - set a task context and profile on the first task. * * TODO: allow setting an alternate profile than unconfined */ static int __init set_init_ctx(void) { struct cred *cred = (__force struct cred *)current->real_cred; set_cred_label(cred, aa_get_label(ns_unconfined(root_ns))); return 0; } static void destroy_buffers(void) { union aa_buffer *aa_buf; spin_lock(&aa_buffers_lock); while (!list_empty(&aa_global_buffers)) { aa_buf = list_first_entry(&aa_global_buffers, union aa_buffer, list); list_del(&aa_buf->list); spin_unlock(&aa_buffers_lock); kfree(aa_buf); spin_lock(&aa_buffers_lock); } spin_unlock(&aa_buffers_lock); } static int __init alloc_buffers(void) { union aa_buffer *aa_buf; int i, num; /* * per cpu set of cached allocated buffers used to help reduce * lock contention */ for_each_possible_cpu(i) { per_cpu(aa_local_buffers, i).hold = 0; per_cpu(aa_local_buffers, i).count = 0; INIT_LIST_HEAD(&per_cpu(aa_local_buffers, i).head); } /* * A function may require two buffers at once. Usually the buffers are * used for a short period of time and are shared. On UP kernel buffers * two should be enough, with more CPUs it is possible that more * buffers will be used simultaneously. The preallocated pool may grow. * This preallocation has also the side-effect that AppArmor will be * disabled early at boot if aa_g_path_max is extremely high. */ if (num_online_cpus() > 1) num = 4 + RESERVE_COUNT; else num = 2 + RESERVE_COUNT; for (i = 0; i < num; i++) { aa_buf = kmalloc(aa_g_path_max, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!aa_buf) { destroy_buffers(); return -ENOMEM; } aa_put_buffer(aa_buf->buffer); } return 0; } #ifdef CONFIG_SYSCTL static int apparmor_dointvec(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { if (!aa_current_policy_admin_capable(NULL)) return -EPERM; if (!apparmor_enabled) return -EINVAL; return proc_dointvec(table, write, buffer, lenp, ppos); } static const struct ctl_table apparmor_sysctl_table[] = { #ifdef CONFIG_USER_NS { .procname = "unprivileged_userns_apparmor_policy", .data = &unprivileged_userns_apparmor_policy, .maxlen = sizeof(int), .mode = 0600, .proc_handler = apparmor_dointvec, }, #endif /* CONFIG_USER_NS */ { .procname = "apparmor_display_secid_mode", .data = &apparmor_display_secid_mode, .maxlen = sizeof(int), .mode = 0600, .proc_handler = apparmor_dointvec, }, { .procname = "apparmor_restrict_unprivileged_unconfined", .data = &aa_unprivileged_unconfined_restricted, .maxlen = sizeof(int), .mode = 0600, .proc_handler = apparmor_dointvec, }, }; static int __init apparmor_init_sysctl(void) { return register_sysctl("kernel", apparmor_sysctl_table) ? 0 : -ENOMEM; } #else static inline int apparmor_init_sysctl(void) { return 0; } #endif /* CONFIG_SYSCTL */ #if defined(CONFIG_NETFILTER) && defined(CONFIG_NETWORK_SECMARK) static unsigned int apparmor_ip_postroute(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct aa_sk_ctx *ctx; struct sock *sk; int error; if (!skb->secmark) return NF_ACCEPT; sk = skb_to_full_sk(skb); if (sk == NULL) return NF_ACCEPT; ctx = aa_sock(sk); rcu_read_lock(); error = apparmor_secmark_check(rcu_dereference(ctx->label), OP_SENDMSG, AA_MAY_SEND, skb->secmark, sk); rcu_read_unlock(); if (!error) return NF_ACCEPT; return NF_DROP_ERR(-ECONNREFUSED); } static const struct nf_hook_ops apparmor_nf_ops[] = { { .hook = apparmor_ip_postroute, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_SELINUX_FIRST, }, #if IS_ENABLED(CONFIG_IPV6) { .hook = apparmor_ip_postroute, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_SELINUX_FIRST, }, #endif }; static int __net_init apparmor_nf_register(struct net *net) { return nf_register_net_hooks(net, apparmor_nf_ops, ARRAY_SIZE(apparmor_nf_ops)); } static void __net_exit apparmor_nf_unregister(struct net *net) { nf_unregister_net_hooks(net, apparmor_nf_ops, ARRAY_SIZE(apparmor_nf_ops)); } static struct pernet_operations apparmor_net_ops = { .init = apparmor_nf_register, .exit = apparmor_nf_unregister, }; static int __init apparmor_nf_ip_init(void) { int err; if (!apparmor_enabled) return 0; err = register_pernet_subsys(&apparmor_net_ops); if (err) panic("Apparmor: register_pernet_subsys: error %d\n", err); return 0; } #endif static char nulldfa_src[] __aligned(8) = { #include "nulldfa.in" }; static struct aa_dfa *nulldfa; static char stacksplitdfa_src[] __aligned(8) = { #include "stacksplitdfa.in" }; struct aa_dfa *stacksplitdfa; struct aa_policydb *nullpdb; static int __init aa_setup_dfa_engine(void) { int error = -ENOMEM; nullpdb = aa_alloc_pdb(GFP_KERNEL); if (!nullpdb) return -ENOMEM; nulldfa = aa_dfa_unpack(nulldfa_src, sizeof(nulldfa_src), TO_ACCEPT1_FLAG(YYTD_DATA32) | TO_ACCEPT2_FLAG(YYTD_DATA32)); if (IS_ERR(nulldfa)) { error = PTR_ERR(nulldfa); goto fail; } nullpdb->dfa = aa_get_dfa(nulldfa); nullpdb->perms = kcalloc(2, sizeof(struct aa_perms), GFP_KERNEL); if (!nullpdb->perms) goto fail; nullpdb->size = 2; stacksplitdfa = aa_dfa_unpack(stacksplitdfa_src, sizeof(stacksplitdfa_src), TO_ACCEPT1_FLAG(YYTD_DATA32) | TO_ACCEPT2_FLAG(YYTD_DATA32)); if (IS_ERR(stacksplitdfa)) { error = PTR_ERR(stacksplitdfa); goto fail; } return 0; fail: aa_put_pdb(nullpdb); aa_put_dfa(nulldfa); nullpdb = NULL; nulldfa = NULL; stacksplitdfa = NULL; return error; } static void __init aa_teardown_dfa_engine(void) { aa_put_dfa(stacksplitdfa); aa_put_dfa(nulldfa); aa_put_pdb(nullpdb); nullpdb = NULL; stacksplitdfa = NULL; nulldfa = NULL; } static int __init apparmor_init(void) { int error; error = aa_setup_dfa_engine(); if (error) { AA_ERROR("Unable to setup dfa engine\n"); goto alloc_out; } error = aa_alloc_root_ns(); if (error) { AA_ERROR("Unable to allocate default profile namespace\n"); goto alloc_out; } error = apparmor_init_sysctl(); if (error) { AA_ERROR("Unable to register sysctls\n"); goto alloc_out; } error = alloc_buffers(); if (error) { AA_ERROR("Unable to allocate work buffers\n"); goto alloc_out; } error = set_init_ctx(); if (error) { AA_ERROR("Failed to set context on init task\n"); aa_free_root_ns(); goto buffers_out; } security_add_hooks(apparmor_hooks, ARRAY_SIZE(apparmor_hooks), &apparmor_lsmid); /* Inform the audit system that secctx is used */ audit_cfg_lsm(&apparmor_lsmid, AUDIT_CFG_LSM_SECCTX_SUBJECT); /* Report that AppArmor successfully initialized */ apparmor_initialized = 1; if (aa_g_profile_mode == APPARMOR_COMPLAIN) aa_info_message("AppArmor initialized: complain mode enabled"); else if (aa_g_profile_mode == APPARMOR_KILL) aa_info_message("AppArmor initialized: kill mode enabled"); else aa_info_message("AppArmor initialized"); return error; buffers_out: destroy_buffers(); alloc_out: aa_destroy_aafs(); aa_teardown_dfa_engine(); apparmor_enabled = false; return error; } DEFINE_LSM(apparmor) = { .id = &apparmor_lsmid, .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, .enabled = &apparmor_enabled, .blobs = &apparmor_blob_sizes, .init = apparmor_init, .initcall_fs = aa_create_aafs, #if defined(CONFIG_NETFILTER) && defined(CONFIG_NETWORK_SECMARK) .initcall_device = apparmor_nf_ip_init, #endif #ifdef CONFIG_SECURITY_APPARMOR_HASH .initcall_late = init_profile_hash, #endif }; |
| 4 4 171 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/bad_inode.c * * Copyright (C) 1997, Stephen Tweedie * * Provide stub functions for unreadable inodes * * Fabian Frederick : August 2003 - All file operations assigned to EIO */ #include <linux/fs.h> #include <linux/export.h> #include <linux/stat.h> #include <linux/time.h> #include <linux/namei.h> #include <linux/poll.h> #include <linux/fiemap.h> static int bad_file_open(struct inode *inode, struct file *filp) { return -EIO; } static const struct file_operations bad_file_ops = { .open = bad_file_open, }; static int bad_inode_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return -EIO; } static struct dentry *bad_inode_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return ERR_PTR(-EIO); } static int bad_inode_link (struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { return -EIO; } static int bad_inode_unlink(struct inode *dir, struct dentry *dentry) { return -EIO; } static int bad_inode_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { return -EIO; } static struct dentry *bad_inode_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { return ERR_PTR(-EIO); } static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry) { return -EIO; } static int bad_inode_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { return -EIO; } static int bad_inode_rename2(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { return -EIO; } static int bad_inode_readlink(struct dentry *dentry, char __user *buffer, int buflen) { return -EIO; } static int bad_inode_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { return -EIO; } static int bad_inode_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { return -EIO; } static int bad_inode_setattr(struct mnt_idmap *idmap, struct dentry *direntry, struct iattr *attrs) { return -EIO; } static ssize_t bad_inode_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { return -EIO; } static const char *bad_inode_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { return ERR_PTR(-EIO); } static struct posix_acl *bad_inode_get_acl(struct inode *inode, int type, bool rcu) { return ERR_PTR(-EIO); } static int bad_inode_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { return -EIO; } static int bad_inode_update_time(struct inode *inode, int flags) { return -EIO; } static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry, struct file *file, unsigned int open_flag, umode_t create_mode) { return -EIO; } static int bad_inode_tmpfile(struct mnt_idmap *idmap, struct inode *inode, struct file *file, umode_t mode) { return -EIO; } static int bad_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { return -EIO; } static const struct inode_operations bad_inode_ops = { .create = bad_inode_create, .lookup = bad_inode_lookup, .link = bad_inode_link, .unlink = bad_inode_unlink, .symlink = bad_inode_symlink, .mkdir = bad_inode_mkdir, .rmdir = bad_inode_rmdir, .mknod = bad_inode_mknod, .rename = bad_inode_rename2, .readlink = bad_inode_readlink, .permission = bad_inode_permission, .getattr = bad_inode_getattr, .setattr = bad_inode_setattr, .listxattr = bad_inode_listxattr, .get_link = bad_inode_get_link, .get_inode_acl = bad_inode_get_acl, .fiemap = bad_inode_fiemap, .update_time = bad_inode_update_time, .atomic_open = bad_inode_atomic_open, .tmpfile = bad_inode_tmpfile, .set_acl = bad_inode_set_acl, }; /* * When a filesystem is unable to read an inode due to an I/O error in * its read_inode() function, it can call make_bad_inode() to return a * set of stubs which will return EIO errors as required. * * We only need to do limited initialisation: all other fields are * preinitialised to zero automatically. */ /** * make_bad_inode - mark an inode bad due to an I/O error * @inode: Inode to mark bad * * When an inode cannot be read due to a media or remote network * failure this function makes the inode "bad" and causes I/O operations * on it to fail from this point on. */ void make_bad_inode(struct inode *inode) { remove_inode_hash(inode); inode->i_mode = S_IFREG; simple_inode_init_ts(inode); inode->i_op = &bad_inode_ops; inode->i_opflags &= ~IOP_XATTR; inode->i_fop = &bad_file_ops; } EXPORT_SYMBOL(make_bad_inode); /* * This tests whether an inode has been flagged as bad. The test uses * &bad_inode_ops to cover the case of invalidated inodes as well as * those created by make_bad_inode() above. */ /** * is_bad_inode - is an inode errored * @inode: inode to test * * Returns true if the inode in question has been marked as bad. */ bool is_bad_inode(struct inode *inode) { return (inode->i_op == &bad_inode_ops); } EXPORT_SYMBOL(is_bad_inode); /** * iget_failed - Mark an under-construction inode as dead and release it * @inode: The inode to discard * * Mark an under-construction inode as dead and release it. */ void iget_failed(struct inode *inode) { make_bad_inode(inode); unlock_new_inode(inode); iput(inode); } EXPORT_SYMBOL(iget_failed); |
| 2892 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 | // SPDX-License-Identifier: GPL-2.0-only /* * Dynamic DMA mapping support. * * This implementation is a fallback for platforms that do not support * I/O TLBs (aka DMA address translation hardware). * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> * Copyright (C) 2000, 2003 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> * * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid * unnecessary i-cache flushing. * 04/07/.. ak Better overflow handling. Assorted fixes. * 05/09/10 linville Add support for syncing ranges, support syncing for * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. * 08/12/11 beckyb Add highmem support */ #define pr_fmt(fmt) "software IO TLB: " fmt #include <linux/cache.h> #include <linux/cc_platform.h> #include <linux/ctype.h> #include <linux/debugfs.h> #include <linux/dma-direct.h> #include <linux/dma-map-ops.h> #include <linux/export.h> #include <linux/gfp.h> #include <linux/highmem.h> #include <linux/io.h> #include <linux/iommu-helper.h> #include <linux/init.h> #include <linux/memblock.h> #include <linux/mm.h> #include <linux/pfn.h> #include <linux/rculist.h> #include <linux/scatterlist.h> #include <linux/set_memory.h> #include <linux/spinlock.h> #include <linux/string.h> #include <linux/swiotlb.h> #include <linux/types.h> #ifdef CONFIG_DMA_RESTRICTED_POOL #include <linux/of.h> #include <linux/of_fdt.h> #include <linux/of_reserved_mem.h> #include <linux/slab.h> #endif #define CREATE_TRACE_POINTS #include <trace/events/swiotlb.h> #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) /* * Minimum IO TLB size to bother booting with. Systems with mainly * 64bit capable cards will only lightly use the swiotlb. If we can't * allocate a contiguous 1MB, we're probably in trouble anyway. */ #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) /** * struct io_tlb_slot - IO TLB slot descriptor * @orig_addr: The original address corresponding to a mapped entry. * @alloc_size: Size of the allocated buffer. * @list: The free list describing the number of free entries available * from each index. * @pad_slots: Number of preceding padding slots. Valid only in the first * allocated non-padding slot. */ struct io_tlb_slot { phys_addr_t orig_addr; size_t alloc_size; unsigned short list; unsigned short pad_slots; }; static bool swiotlb_force_bounce; static bool swiotlb_force_disable; #ifdef CONFIG_SWIOTLB_DYNAMIC static void swiotlb_dyn_alloc(struct work_struct *work); static struct io_tlb_mem io_tlb_default_mem = { .lock = __SPIN_LOCK_UNLOCKED(io_tlb_default_mem.lock), .pools = LIST_HEAD_INIT(io_tlb_default_mem.pools), .dyn_alloc = __WORK_INITIALIZER(io_tlb_default_mem.dyn_alloc, swiotlb_dyn_alloc), }; #else /* !CONFIG_SWIOTLB_DYNAMIC */ static struct io_tlb_mem io_tlb_default_mem; #endif /* CONFIG_SWIOTLB_DYNAMIC */ static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT; static unsigned long default_nareas; /** * struct io_tlb_area - IO TLB memory area descriptor * * This is a single area with a single lock. * * @used: The number of used IO TLB block. * @index: The slot index to start searching in this area for next round. * @lock: The lock to protect the above data structures in the map and * unmap calls. */ struct io_tlb_area { unsigned long used; unsigned int index; spinlock_t lock; }; /* * Round up number of slabs to the next power of 2. The last area is going * be smaller than the rest if default_nslabs is not power of two. * The number of slot in an area should be a multiple of IO_TLB_SEGSIZE, * otherwise a segment may span two or more areas. It conflicts with free * contiguous slots tracking: free slots are treated contiguous no matter * whether they cross an area boundary. * * Return true if default_nslabs is rounded up. */ static bool round_up_default_nslabs(void) { if (!default_nareas) return false; if (default_nslabs < IO_TLB_SEGSIZE * default_nareas) default_nslabs = IO_TLB_SEGSIZE * default_nareas; else if (is_power_of_2(default_nslabs)) return false; default_nslabs = roundup_pow_of_two(default_nslabs); return true; } /** * swiotlb_adjust_nareas() - adjust the number of areas and slots * @nareas: Desired number of areas. Zero is treated as 1. * * Adjust the default number of areas in a memory pool. * The default size of the memory pool may also change to meet minimum area * size requirements. */ static void swiotlb_adjust_nareas(unsigned int nareas) { if (!nareas) nareas = 1; else if (!is_power_of_2(nareas)) nareas = roundup_pow_of_two(nareas); default_nareas = nareas; pr_info("area num %d.\n", nareas); if (round_up_default_nslabs()) pr_info("SWIOTLB bounce buffer size roundup to %luMB", (default_nslabs << IO_TLB_SHIFT) >> 20); } /** * limit_nareas() - get the maximum number of areas for a given memory pool size * @nareas: Desired number of areas. * @nslots: Total number of slots in the memory pool. * * Limit the number of areas to the maximum possible number of areas in * a memory pool of the given size. * * Return: Maximum possible number of areas. */ static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots) { if (nslots < nareas * IO_TLB_SEGSIZE) return nslots / IO_TLB_SEGSIZE; return nareas; } static int __init setup_io_tlb_npages(char *str) { if (isdigit(*str)) { /* avoid tail segment of size < IO_TLB_SEGSIZE */ default_nslabs = ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE); } if (*str == ',') ++str; if (isdigit(*str)) swiotlb_adjust_nareas(simple_strtoul(str, &str, 0)); if (*str == ',') ++str; if (!strcmp(str, "force")) swiotlb_force_bounce = true; else if (!strcmp(str, "noforce")) swiotlb_force_disable = true; return 0; } early_param("swiotlb", setup_io_tlb_npages); unsigned long swiotlb_size_or_default(void) { return default_nslabs << IO_TLB_SHIFT; } void __init swiotlb_adjust_size(unsigned long size) { /* * If swiotlb parameter has not been specified, give a chance to * architectures such as those supporting memory encryption to * adjust/expand SWIOTLB size for their use. */ if (default_nslabs != IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT) return; size = ALIGN(size, IO_TLB_SIZE); default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); if (round_up_default_nslabs()) size = default_nslabs << IO_TLB_SHIFT; pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20); } void swiotlb_print_info(void) { struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; if (!mem->nslabs) { pr_warn("No low mem\n"); return; } pr_info("mapped [mem %pa-%pa] (%luMB)\n", &mem->start, &mem->end, (mem->nslabs << IO_TLB_SHIFT) >> 20); } static inline unsigned long io_tlb_offset(unsigned long val) { return val & (IO_TLB_SEGSIZE - 1); } static inline unsigned long nr_slots(u64 val) { return DIV_ROUND_UP(val, IO_TLB_SIZE); } /* * Early SWIOTLB allocation may be too early to allow an architecture to * perform the desired operations. This function allows the architecture to * call SWIOTLB when the operations are possible. It needs to be called * before the SWIOTLB memory is used. */ void __init swiotlb_update_mem_attributes(void) { struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; unsigned long bytes; if (!mem->nslabs || mem->late_alloc) return; bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT); set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT); } static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start, unsigned long nslabs, bool late_alloc, unsigned int nareas) { void *vaddr = phys_to_virt(start); unsigned long bytes = nslabs << IO_TLB_SHIFT, i; mem->nslabs = nslabs; mem->start = start; mem->end = mem->start + bytes; mem->late_alloc = late_alloc; mem->nareas = nareas; mem->area_nslabs = nslabs / mem->nareas; for (i = 0; i < mem->nareas; i++) { spin_lock_init(&mem->areas[i].lock); mem->areas[i].index = 0; mem->areas[i].used = 0; } for (i = 0; i < mem->nslabs; i++) { mem->slots[i].list = min(IO_TLB_SEGSIZE - io_tlb_offset(i), mem->nslabs - i); mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; mem->slots[i].pad_slots = 0; } memset(vaddr, 0, bytes); mem->vaddr = vaddr; return; } /** * add_mem_pool() - add a memory pool to the allocator * @mem: Software IO TLB allocator. * @pool: Memory pool to be added. */ static void add_mem_pool(struct io_tlb_mem *mem, struct io_tlb_pool *pool) { #ifdef CONFIG_SWIOTLB_DYNAMIC spin_lock(&mem->lock); list_add_rcu(&pool->node, &mem->pools); mem->nslabs += pool->nslabs; spin_unlock(&mem->lock); #else mem->nslabs = pool->nslabs; #endif } static void __init *swiotlb_memblock_alloc(unsigned long nslabs, unsigned int flags, int (*remap)(void *tlb, unsigned long nslabs)) { size_t bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT); void *tlb; /* * By default allocate the bounce buffer memory from low memory, but * allow to pick a location everywhere for hypervisors with guest * memory encryption. */ if (flags & SWIOTLB_ANY) tlb = memblock_alloc(bytes, PAGE_SIZE); else tlb = memblock_alloc_low(bytes, PAGE_SIZE); if (!tlb) { pr_warn("%s: Failed to allocate %zu bytes tlb structure\n", __func__, bytes); return NULL; } if (remap && remap(tlb, nslabs) < 0) { memblock_free(tlb, PAGE_ALIGN(bytes)); pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes); return NULL; } return tlb; } /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. */ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, int (*remap)(void *tlb, unsigned long nslabs)) { struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; unsigned long nslabs; unsigned int nareas; size_t alloc_size; void *tlb; if (!addressing_limit && !swiotlb_force_bounce) return; if (swiotlb_force_disable) return; io_tlb_default_mem.force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE); #ifdef CONFIG_SWIOTLB_DYNAMIC if (!remap) io_tlb_default_mem.can_grow = true; if (flags & SWIOTLB_ANY) io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1); else io_tlb_default_mem.phys_limit = ARCH_LOW_ADDRESS_LIMIT; #endif if (!default_nareas) swiotlb_adjust_nareas(num_possible_cpus()); nslabs = default_nslabs; nareas = limit_nareas(default_nareas, nslabs); while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) { if (nslabs <= IO_TLB_MIN_SLABS) return; nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); nareas = limit_nareas(nareas, nslabs); } if (default_nslabs != nslabs) { pr_info("SWIOTLB bounce buffer size adjusted %lu -> %lu slabs", default_nslabs, nslabs); default_nslabs = nslabs; } alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs)); mem->slots = memblock_alloc(alloc_size, PAGE_SIZE); if (!mem->slots) { pr_warn("%s: Failed to allocate %zu bytes align=0x%lx\n", __func__, alloc_size, PAGE_SIZE); return; } mem->areas = memblock_alloc(array_size(sizeof(struct io_tlb_area), nareas), SMP_CACHE_BYTES); if (!mem->areas) { pr_warn("%s: Failed to allocate mem->areas.\n", __func__); return; } swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false, nareas); add_mem_pool(&io_tlb_default_mem, mem); if (flags & SWIOTLB_VERBOSE) swiotlb_print_info(); } void __init swiotlb_init(bool addressing_limit, unsigned int flags) { swiotlb_init_remap(addressing_limit, flags, NULL); } /* * Systems with larger DMA zones (those that don't support ISA) can * initialize the swiotlb later using the slab allocator if needed. * This should be just like above, but with some error catching. */ int swiotlb_init_late(size_t size, gfp_t gfp_mask, int (*remap)(void *tlb, unsigned long nslabs)) { struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); unsigned int nareas; unsigned char *vstart = NULL; unsigned int order, area_order; bool retried = false; int rc = 0; if (io_tlb_default_mem.nslabs) return 0; if (swiotlb_force_disable) return 0; io_tlb_default_mem.force_bounce = swiotlb_force_bounce; #ifdef CONFIG_SWIOTLB_DYNAMIC if (!remap) io_tlb_default_mem.can_grow = true; if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp_mask & __GFP_DMA)) io_tlb_default_mem.phys_limit = zone_dma_limit; else if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp_mask & __GFP_DMA32)) io_tlb_default_mem.phys_limit = max(DMA_BIT_MASK(32), zone_dma_limit); else io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1); #endif if (!default_nareas) swiotlb_adjust_nareas(num_possible_cpus()); retry: order = get_order(nslabs << IO_TLB_SHIFT); nslabs = SLABS_PER_PAGE << order; while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { vstart = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, order); if (vstart) break; order--; nslabs = SLABS_PER_PAGE << order; retried = true; } if (!vstart) return -ENOMEM; if (remap) rc = remap(vstart, nslabs); if (rc) { free_pages((unsigned long)vstart, order); nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); if (nslabs < IO_TLB_MIN_SLABS) return rc; retried = true; goto retry; } if (retried) { pr_warn("only able to allocate %ld MB\n", (PAGE_SIZE << order) >> 20); } nareas = limit_nareas(default_nareas, nslabs); area_order = get_order(array_size(sizeof(*mem->areas), nareas)); mem->areas = (struct io_tlb_area *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order); if (!mem->areas) goto error_area; mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(array_size(sizeof(*mem->slots), nslabs))); if (!mem->slots) goto error_slots; set_memory_decrypted((unsigned long)vstart, (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT); swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true, nareas); add_mem_pool(&io_tlb_default_mem, mem); swiotlb_print_info(); return 0; error_slots: free_pages((unsigned long)mem->areas, area_order); error_area: free_pages((unsigned long)vstart, order); return -ENOMEM; } void __init swiotlb_exit(void) { struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; unsigned long tbl_vaddr; size_t tbl_size, slots_size; unsigned int area_order; if (swiotlb_force_bounce) return; if (!mem->nslabs) return; pr_info("tearing down default memory pool\n"); tbl_vaddr = (unsigned long)phys_to_virt(mem->start); tbl_size = PAGE_ALIGN(mem->end - mem->start); slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs)); set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT); if (mem->late_alloc) { area_order = get_order(array_size(sizeof(*mem->areas), mem->nareas)); free_pages((unsigned long)mem->areas, area_order); free_pages(tbl_vaddr, get_order(tbl_size)); free_pages((unsigned long)mem->slots, get_order(slots_size)); } else { memblock_free_late(__pa(mem->areas), array_size(sizeof(*mem->areas), mem->nareas)); memblock_free_late(mem->start, tbl_size); memblock_free_late(__pa(mem->slots), slots_size); } memset(mem, 0, sizeof(*mem)); } #ifdef CONFIG_SWIOTLB_DYNAMIC /** * alloc_dma_pages() - allocate pages to be used for DMA * @gfp: GFP flags for the allocation. * @bytes: Size of the buffer. * @phys_limit: Maximum allowed physical address of the buffer. * * Allocate pages from the buddy allocator. If successful, make the allocated * pages decrypted that they can be used for DMA. * * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN) * if the allocated physical address was above @phys_limit. */ static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit) { unsigned int order = get_order(bytes); struct page *page; phys_addr_t paddr; void *vaddr; page = alloc_pages(gfp, order); if (!page) return NULL; paddr = page_to_phys(page); if (paddr + bytes - 1 > phys_limit) { __free_pages(page, order); return ERR_PTR(-EAGAIN); } vaddr = phys_to_virt(paddr); if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes))) goto error; return page; error: /* Intentional leak if pages cannot be encrypted again. */ if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) __free_pages(page, order); return NULL; } /** * swiotlb_alloc_tlb() - allocate a dynamic IO TLB buffer * @dev: Device for which a memory pool is allocated. * @bytes: Size of the buffer. * @phys_limit: Maximum allowed physical address of the buffer. * @gfp: GFP flags for the allocation. * * Return: Allocated pages, or %NULL on allocation failure. */ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes, u64 phys_limit, gfp_t gfp) { struct page *page; /* * Allocate from the atomic pools if memory is encrypted and * the allocation is atomic, because decrypting may block. */ if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) { void *vaddr; if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) return NULL; return dma_alloc_from_pool(dev, bytes, &vaddr, gfp, dma_coherent_ok); } gfp &= ~GFP_ZONEMASK; if (phys_limit <= zone_dma_limit) gfp |= __GFP_DMA; else if (phys_limit <= DMA_BIT_MASK(32)) gfp |= __GFP_DMA32; while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) { if (IS_ENABLED(CONFIG_ZONE_DMA32) && phys_limit < DMA_BIT_MASK(64) && !(gfp & (__GFP_DMA32 | __GFP_DMA))) gfp |= __GFP_DMA32; else if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & __GFP_DMA)) gfp = (gfp & ~__GFP_DMA32) | __GFP_DMA; else return NULL; } return page; } /** * swiotlb_free_tlb() - free a dynamically allocated IO TLB buffer * @vaddr: Virtual address of the buffer. * @bytes: Size of the buffer. */ static void swiotlb_free_tlb(void *vaddr, size_t bytes) { if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && dma_free_from_pool(NULL, vaddr, bytes)) return; /* Intentional leak if pages cannot be encrypted again. */ if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) __free_pages(virt_to_page(vaddr), get_order(bytes)); } /** * swiotlb_alloc_pool() - allocate a new IO TLB memory pool * @dev: Device for which a memory pool is allocated. * @minslabs: Minimum number of slabs. * @nslabs: Desired (maximum) number of slabs. * @nareas: Number of areas. * @phys_limit: Maximum DMA buffer physical address. * @gfp: GFP flags for the allocations. * * Allocate and initialize a new IO TLB memory pool. The actual number of * slabs may be reduced if allocation of @nslabs fails. If even * @minslabs cannot be allocated, this function fails. * * Return: New memory pool, or %NULL on allocation failure. */ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev, unsigned long minslabs, unsigned long nslabs, unsigned int nareas, u64 phys_limit, gfp_t gfp) { struct io_tlb_pool *pool; unsigned int slot_order; struct page *tlb; size_t pool_size; size_t tlb_size; if (nslabs > SLABS_PER_PAGE << MAX_PAGE_ORDER) { nslabs = SLABS_PER_PAGE << MAX_PAGE_ORDER; nareas = limit_nareas(nareas, nslabs); } pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), nareas); pool = kzalloc(pool_size, gfp); if (!pool) goto error; pool->areas = (void *)pool + sizeof(*pool); tlb_size = nslabs << IO_TLB_SHIFT; while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp))) { if (nslabs <= minslabs) goto error_tlb; nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); nareas = limit_nareas(nareas, nslabs); tlb_size = nslabs << IO_TLB_SHIFT; } slot_order = get_order(array_size(sizeof(*pool->slots), nslabs)); pool->slots = (struct io_tlb_slot *) __get_free_pages(gfp, slot_order); if (!pool->slots) goto error_slots; swiotlb_init_io_tlb_pool(pool, page_to_phys(tlb), nslabs, true, nareas); return pool; error_slots: swiotlb_free_tlb(page_address(tlb), tlb_size); error_tlb: kfree(pool); error: return NULL; } /** * swiotlb_dyn_alloc() - dynamic memory pool allocation worker * @work: Pointer to dyn_alloc in struct io_tlb_mem. */ static void swiotlb_dyn_alloc(struct work_struct *work) { struct io_tlb_mem *mem = container_of(work, struct io_tlb_mem, dyn_alloc); struct io_tlb_pool *pool; pool = swiotlb_alloc_pool(NULL, IO_TLB_MIN_SLABS, default_nslabs, default_nareas, mem->phys_limit, GFP_KERNEL); if (!pool) { pr_warn_ratelimited("Failed to allocate new pool"); return; } add_mem_pool(mem, pool); } /** * swiotlb_dyn_free() - RCU callback to free a memory pool * @rcu: RCU head in the corresponding struct io_tlb_pool. */ static void swiotlb_dyn_free(struct rcu_head *rcu) { struct io_tlb_pool *pool = container_of(rcu, struct io_tlb_pool, rcu); size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs); size_t tlb_size = pool->end - pool->start; free_pages((unsigned long)pool->slots, get_order(slots_size)); swiotlb_free_tlb(pool->vaddr, tlb_size); kfree(pool); } /** * __swiotlb_find_pool() - find the IO TLB pool for a physical address * @dev: Device which has mapped the DMA buffer. * @paddr: Physical address within the DMA buffer. * * Find the IO TLB memory pool descriptor which contains the given physical * address, if any. This function is for use only when the dev is known to * be using swiotlb. Use swiotlb_find_pool() for the more general case * when this condition is not met. * * Return: Memory pool which contains @paddr, or %NULL if none. */ struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; rcu_read_lock(); list_for_each_entry_rcu(pool, &mem->pools, node) { if (paddr >= pool->start && paddr < pool->end) goto out; } list_for_each_entry_rcu(pool, &dev->dma_io_tlb_pools, node) { if (paddr >= pool->start && paddr < pool->end) goto out; } pool = NULL; out: rcu_read_unlock(); return pool; } /** * swiotlb_del_pool() - remove an IO TLB pool from a device * @dev: Owning device. * @pool: Memory pool to be removed. */ static void swiotlb_del_pool(struct device *dev, struct io_tlb_pool *pool) { unsigned long flags; spin_lock_irqsave(&dev->dma_io_tlb_lock, flags); list_del_rcu(&pool->node); spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); call_rcu(&pool->rcu, swiotlb_dyn_free); } #endif /* CONFIG_SWIOTLB_DYNAMIC */ /** * swiotlb_dev_init() - initialize swiotlb fields in &struct device * @dev: Device to be initialized. */ void swiotlb_dev_init(struct device *dev) { dev->dma_io_tlb_mem = &io_tlb_default_mem; #ifdef CONFIG_SWIOTLB_DYNAMIC INIT_LIST_HEAD(&dev->dma_io_tlb_pools); spin_lock_init(&dev->dma_io_tlb_lock); dev->dma_uses_io_tlb = false; #endif } /** * swiotlb_align_offset() - Get required offset into an IO TLB allocation. * @dev: Owning device. * @align_mask: Allocation alignment mask. * @addr: DMA address. * * Return the minimum offset from the start of an IO TLB allocation which is * required for a given buffer address and allocation alignment to keep the * device happy. * * First, the address bits covered by min_align_mask must be identical in the * original address and the bounce buffer address. High bits are preserved by * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra * padding bytes before the bounce buffer. * * Second, @align_mask specifies which bits of the first allocated slot must * be zero. This may require allocating additional padding slots, and then the * offset (in bytes) from the first such padding slot is returned. */ static unsigned int swiotlb_align_offset(struct device *dev, unsigned int align_mask, u64 addr) { return addr & dma_get_min_align_mask(dev) & (align_mask | (IO_TLB_SIZE - 1)); } /* * Bounce: copy the swiotlb buffer from or back to the original dma location */ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir, struct io_tlb_pool *mem) { int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = mem->slots[index].orig_addr; size_t alloc_size = mem->slots[index].alloc_size; unsigned long pfn = PFN_DOWN(orig_addr); unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; int tlb_offset; if (orig_addr == INVALID_PHYS_ADDR) return; /* * It's valid for tlb_offset to be negative. This can happen when the * "offset" returned by swiotlb_align_offset() is non-zero, and the * tlb_addr is pointing within the first "offset" bytes of the second * or subsequent slots of the allocated swiotlb area. While it's not * valid for tlb_addr to be pointing within the first "offset" bytes * of the first slot, there's no way to check for such an error since * this function can't distinguish the first slot from the second and * subsequent slots. */ tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) - swiotlb_align_offset(dev, 0, orig_addr); orig_addr += tlb_offset; alloc_size -= tlb_offset; if (size > alloc_size) { dev_WARN_ONCE(dev, 1, "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n", alloc_size, size); size = alloc_size; } if (PageHighMem(pfn_to_page(pfn))) { unsigned int offset = orig_addr & ~PAGE_MASK; struct page *page; unsigned int sz = 0; unsigned long flags; while (size) { sz = min_t(size_t, PAGE_SIZE - offset, size); local_irq_save(flags); page = pfn_to_page(pfn); if (dir == DMA_TO_DEVICE) memcpy_from_page(vaddr, page, offset, sz); else memcpy_to_page(page, offset, vaddr, sz); local_irq_restore(flags); size -= sz; pfn++; vaddr += sz; offset = 0; } } else if (dir == DMA_TO_DEVICE) { memcpy(vaddr, phys_to_virt(orig_addr), size); } else { memcpy(phys_to_virt(orig_addr), vaddr, size); } } static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx) { return start + (idx << IO_TLB_SHIFT); } /* * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. */ static inline unsigned long get_max_slots(unsigned long boundary_mask) { return (boundary_mask >> IO_TLB_SHIFT) + 1; } static unsigned int wrap_area_index(struct io_tlb_pool *mem, unsigned int index) { if (index >= mem->area_nslabs) return 0; return index; } /* * Track the total used slots with a global atomic value in order to have * correct information to determine the high water mark. The mem_used() * function gives imprecise results because there's no locking across * multiple areas. */ #ifdef CONFIG_DEBUG_FS static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots) { unsigned long old_hiwater, new_used; new_used = atomic_long_add_return(nslots, &mem->total_used); old_hiwater = atomic_long_read(&mem->used_hiwater); do { if (new_used <= old_hiwater) break; } while (!atomic_long_try_cmpxchg(&mem->used_hiwater, &old_hiwater, new_used)); } static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) { atomic_long_sub(nslots, &mem->total_used); } #else /* !CONFIG_DEBUG_FS */ static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots) { } static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) { } #endif /* CONFIG_DEBUG_FS */ #ifdef CONFIG_SWIOTLB_DYNAMIC #ifdef CONFIG_DEBUG_FS static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots) { atomic_long_add(nslots, &mem->transient_nslabs); } static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots) { atomic_long_sub(nslots, &mem->transient_nslabs); } #else /* !CONFIG_DEBUG_FS */ static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots) { } static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots) { } #endif /* CONFIG_DEBUG_FS */ #endif /* CONFIG_SWIOTLB_DYNAMIC */ /** * swiotlb_search_pool_area() - search one memory area in one pool * @dev: Device which maps the buffer. * @pool: Memory pool to be searched. * @area_index: Index of the IO TLB memory area to be searched. * @orig_addr: Original (non-bounced) IO buffer address. * @alloc_size: Total requested size of the bounce buffer, * including initial alignment padding. * @alloc_align_mask: Required alignment of the allocated buffer. * * Find a suitable sequence of IO TLB entries for the request and allocate * a buffer from the given IO TLB memory area. * This function takes care of locking. * * Return: Index of the first allocated slot, or -1 on error. */ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool, int area_index, phys_addr_t orig_addr, size_t alloc_size, unsigned int alloc_align_mask) { struct io_tlb_area *area = pool->areas + area_index; unsigned long boundary_mask = dma_get_seg_boundary(dev); dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(dev, pool->start) & boundary_mask; unsigned long max_slots = get_max_slots(boundary_mask); unsigned int iotlb_align_mask = dma_get_min_align_mask(dev); unsigned int nslots = nr_slots(alloc_size), stride; unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr); unsigned int index, slots_checked, count = 0, i; unsigned long flags; unsigned int slot_base; unsigned int slot_index; BUG_ON(!nslots); BUG_ON(area_index >= pool->nareas); /* * Historically, swiotlb allocations >= PAGE_SIZE were guaranteed to be * page-aligned in the absence of any other alignment requirements. * 'alloc_align_mask' was later introduced to specify the alignment * explicitly, however this is passed as zero for streaming mappings * and so we preserve the old behaviour there in case any drivers are * relying on it. */ if (!alloc_align_mask && !iotlb_align_mask && alloc_size >= PAGE_SIZE) alloc_align_mask = PAGE_SIZE - 1; /* * Ensure that the allocation is at least slot-aligned and update * 'iotlb_align_mask' to ignore bits that will be preserved when * offsetting into the allocation. */ alloc_align_mask |= (IO_TLB_SIZE - 1); iotlb_align_mask &= ~alloc_align_mask; /* * For mappings with an alignment requirement don't bother looping to * unaligned slots once we found an aligned one. */ stride = get_max_slots(max(alloc_align_mask, iotlb_align_mask)); spin_lock_irqsave(&area->lock, flags); if (unlikely(nslots > pool->area_nslabs - area->used)) goto not_found; slot_base = area_index * pool->area_nslabs; index = area->index; for (slots_checked = 0; slots_checked < pool->area_nslabs; ) { phys_addr_t tlb_addr; slot_index = slot_base + index; tlb_addr = slot_addr(tbl_dma_addr, slot_index); if ((tlb_addr & alloc_align_mask) || (orig_addr && (tlb_addr & iotlb_align_mask) != (orig_addr & iotlb_align_mask))) { index = wrap_area_index(pool, index + 1); slots_checked++; continue; } if (!iommu_is_span_boundary(slot_index, nslots, nr_slots(tbl_dma_addr), max_slots)) { if (pool->slots[slot_index].list >= nslots) goto found; } index = wrap_area_index(pool, index + stride); slots_checked += stride; } not_found: spin_unlock_irqrestore(&area->lock, flags); return -1; found: /* * If we find a slot that indicates we have 'nslots' number of * contiguous buffers, we allocate the buffers from that slot onwards * and set the list of free entries to '0' indicating unavailable. */ for (i = slot_index; i < slot_index + nslots; i++) { pool->slots[i].list = 0; pool->slots[i].alloc_size = alloc_size - (offset + ((i - slot_index) << IO_TLB_SHIFT)); } for (i = slot_index - 1; io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && pool->slots[i].list; i--) pool->slots[i].list = ++count; /* * Update the indices to avoid searching in the next round. */ area->index = wrap_area_index(pool, index + nslots); area->used += nslots; spin_unlock_irqrestore(&area->lock, flags); inc_used_and_hiwater(dev->dma_io_tlb_mem, nslots); return slot_index; } #ifdef CONFIG_SWIOTLB_DYNAMIC /** * swiotlb_search_area() - search one memory area in all pools * @dev: Device which maps the buffer. * @start_cpu: Start CPU number. * @cpu_offset: Offset from @start_cpu. * @orig_addr: Original (non-bounced) IO buffer address. * @alloc_size: Total requested size of the bounce buffer, * including initial alignment padding. * @alloc_align_mask: Required alignment of the allocated buffer. * @retpool: Used memory pool, updated on return. * * Search one memory area in all pools for a sequence of slots that match the * allocation constraints. * * Return: Index of the first allocated slot, or -1 on error. */ static int swiotlb_search_area(struct device *dev, int start_cpu, int cpu_offset, phys_addr_t orig_addr, size_t alloc_size, unsigned int alloc_align_mask, struct io_tlb_pool **retpool) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; int area_index; int index = -1; rcu_read_lock(); list_for_each_entry_rcu(pool, &mem->pools, node) { if (cpu_offset >= pool->nareas) continue; area_index = (start_cpu + cpu_offset) & (pool->nareas - 1); index = swiotlb_search_pool_area(dev, pool, area_index, orig_addr, alloc_size, alloc_align_mask); if (index >= 0) { *retpool = pool; break; } } rcu_read_unlock(); return index; } /** * swiotlb_find_slots() - search for slots in the whole swiotlb * @dev: Device which maps the buffer. * @orig_addr: Original (non-bounced) IO buffer address. * @alloc_size: Total requested size of the bounce buffer, * including initial alignment padding. * @alloc_align_mask: Required alignment of the allocated buffer. * @retpool: Used memory pool, updated on return. * * Search through the whole software IO TLB to find a sequence of slots that * match the allocation constraints. * * Return: Index of the first allocated slot, or -1 on error. */ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, size_t alloc_size, unsigned int alloc_align_mask, struct io_tlb_pool **retpool) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; unsigned long nslabs; unsigned long flags; u64 phys_limit; int cpu, i; int index; if (alloc_size > IO_TLB_SEGSIZE * IO_TLB_SIZE) return -1; cpu = raw_smp_processor_id(); for (i = 0; i < default_nareas; ++i) { index = swiotlb_search_area(dev, cpu, i, orig_addr, alloc_size, alloc_align_mask, &pool); if (index >= 0) goto found; } if (!mem->can_grow) return -1; schedule_work(&mem->dyn_alloc); nslabs = nr_slots(alloc_size); phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit); pool = swiotlb_alloc_pool(dev, nslabs, nslabs, 1, phys_limit, GFP_NOWAIT); if (!pool) return -1; index = swiotlb_search_pool_area(dev, pool, 0, orig_addr, alloc_size, alloc_align_mask); if (index < 0) { swiotlb_dyn_free(&pool->rcu); return -1; } pool->transient = true; spin_lock_irqsave(&dev->dma_io_tlb_lock, flags); list_add_rcu(&pool->node, &dev->dma_io_tlb_pools); spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); inc_transient_used(mem, pool->nslabs); found: WRITE_ONCE(dev->dma_uses_io_tlb, true); /* * The general barrier orders reads and writes against a presumed store * of the SWIOTLB buffer address by a device driver (to a driver private * data structure). It serves two purposes. * * First, the store to dev->dma_uses_io_tlb must be ordered before the * presumed store. This guarantees that the returned buffer address * cannot be passed to another CPU before updating dev->dma_uses_io_tlb. * * Second, the load from mem->pools must be ordered before the same * presumed store. This guarantees that the returned buffer address * cannot be observed by another CPU before an update of the RCU list * that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy * atomicity). * * See also the comment in swiotlb_find_pool(). */ smp_mb(); *retpool = pool; return index; } #else /* !CONFIG_SWIOTLB_DYNAMIC */ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, size_t alloc_size, unsigned int alloc_align_mask, struct io_tlb_pool **retpool) { struct io_tlb_pool *pool; int start, i; int index; *retpool = pool = &dev->dma_io_tlb_mem->defpool; i = start = raw_smp_processor_id() & (pool->nareas - 1); do { index = swiotlb_search_pool_area(dev, pool, i, orig_addr, alloc_size, alloc_align_mask); if (index >= 0) return index; if (++i >= pool->nareas) i = 0; } while (i != start); return -1; } #endif /* CONFIG_SWIOTLB_DYNAMIC */ #ifdef CONFIG_DEBUG_FS /** * mem_used() - get number of used slots in an allocator * @mem: Software IO TLB allocator. * * The result is accurate in this version of the function, because an atomic * counter is available if CONFIG_DEBUG_FS is set. * * Return: Number of used slots. */ static unsigned long mem_used(struct io_tlb_mem *mem) { return atomic_long_read(&mem->total_used); } #else /* !CONFIG_DEBUG_FS */ /** * mem_pool_used() - get number of used slots in a memory pool * @pool: Software IO TLB memory pool. * * The result is not accurate, see mem_used(). * * Return: Approximate number of used slots. */ static unsigned long mem_pool_used(struct io_tlb_pool *pool) { int i; unsigned long used = 0; for (i = 0; i < pool->nareas; i++) used += pool->areas[i].used; return used; } /** * mem_used() - get number of used slots in an allocator * @mem: Software IO TLB allocator. * * The result is not accurate, because there is no locking of individual * areas. * * Return: Approximate number of used slots. */ static unsigned long mem_used(struct io_tlb_mem *mem) { #ifdef CONFIG_SWIOTLB_DYNAMIC struct io_tlb_pool *pool; unsigned long used = 0; rcu_read_lock(); list_for_each_entry_rcu(pool, &mem->pools, node) used += mem_pool_used(pool); rcu_read_unlock(); return used; #else return mem_pool_used(&mem->defpool); #endif } #endif /* CONFIG_DEBUG_FS */ /** * swiotlb_tbl_map_single() - bounce buffer map a single contiguous physical area * @dev: Device which maps the buffer. * @orig_addr: Original (non-bounced) physical IO buffer address * @mapping_size: Requested size of the actual bounce buffer, excluding * any pre- or post-padding for alignment * @alloc_align_mask: Required start and end alignment of the allocated buffer * @dir: DMA direction * @attrs: Optional DMA attributes for the map operation * * Find and allocate a suitable sequence of IO TLB slots for the request. * The allocated space starts at an alignment specified by alloc_align_mask, * and the size of the allocated space is rounded up so that the total amount * of allocated space is a multiple of (alloc_align_mask + 1). If * alloc_align_mask is zero, the allocated space may be at any alignment and * the size is not rounded up. * * The returned address is within the allocated space and matches the bits * of orig_addr that are specified in the DMA min_align_mask for the device. As * such, this returned address may be offset from the beginning of the allocated * space. The bounce buffer space starting at the returned address for * mapping_size bytes is initialized to the contents of the original IO buffer * area. Any pre-padding (due to an offset) and any post-padding (due to * rounding-up the size) is not initialized. */ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, size_t mapping_size, unsigned int alloc_align_mask, enum dma_data_direction dir, unsigned long attrs) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; unsigned int offset; struct io_tlb_pool *pool; unsigned int i; size_t size; int index; phys_addr_t tlb_addr; unsigned short pad_slots; if (!mem || !mem->nslabs) { dev_warn_ratelimited(dev, "Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); return (phys_addr_t)DMA_MAPPING_ERROR; } if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); /* * The default swiotlb memory pool is allocated with PAGE_SIZE * alignment. If a mapping is requested with larger alignment, * the mapping may be unable to use the initial slot(s) in all * sets of IO_TLB_SEGSIZE slots. In such case, a mapping request * of or near the maximum mapping size would always fail. */ dev_WARN_ONCE(dev, alloc_align_mask > ~PAGE_MASK, "Alloc alignment may prevent fulfilling requests with max mapping_size\n"); offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr); size = ALIGN(mapping_size + offset, alloc_align_mask + 1); index = swiotlb_find_slots(dev, orig_addr, size, alloc_align_mask, &pool); if (index == -1) { if (!(attrs & DMA_ATTR_NO_WARN)) dev_warn_ratelimited(dev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", size, mem->nslabs, mem_used(mem)); return (phys_addr_t)DMA_MAPPING_ERROR; } /* * If dma_skip_sync was set, reset it on first SWIOTLB buffer * mapping to always sync SWIOTLB buffers. */ dma_reset_need_sync(dev); /* * Save away the mapping from the original address to the DMA address. * This is needed when we sync the memory. Then we sync the buffer if * needed. */ pad_slots = offset >> IO_TLB_SHIFT; offset &= (IO_TLB_SIZE - 1); index += pad_slots; pool->slots[index].pad_slots = pad_slots; for (i = 0; i < (nr_slots(size) - pad_slots); i++) pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); tlb_addr = slot_addr(pool->start, index) + offset; /* * When the device is writing memory, i.e. dir == DMA_FROM_DEVICE, copy * the original buffer to the TLB buffer before initiating DMA in order * to preserve the original's data if the device does a partial write, * i.e. if the device doesn't overwrite the entire buffer. Preserving * the original data, even if it's garbage, is necessary to match * hardware behavior. Use of swiotlb is supposed to be transparent, * i.e. swiotlb must not corrupt memory by clobbering unwritten bytes. */ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE, pool); return tlb_addr; } static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr, struct io_tlb_pool *mem) { unsigned long flags; unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr); int index, nslots, aindex; struct io_tlb_area *area; int count, i; index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; index -= mem->slots[index].pad_slots; nslots = nr_slots(mem->slots[index].alloc_size + offset); aindex = index / mem->area_nslabs; area = &mem->areas[aindex]; /* * Return the buffer to the free list by setting the corresponding * entries to indicate the number of contiguous entries available. * While returning the entries to the free list, we merge the entries * with slots below and above the pool being returned. */ BUG_ON(aindex >= mem->nareas); spin_lock_irqsave(&area->lock, flags); if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE)) count = mem->slots[index + nslots].list; else count = 0; /* * Step 1: return the slots to the free list, merging the slots with * superceeding slots */ for (i = index + nslots - 1; i >= index; i--) { mem->slots[i].list = ++count; mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; mem->slots[i].pad_slots = 0; } /* * Step 2: merge the returned slots with the preceding slots, if * available (non zero) */ for (i = index - 1; io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list; i--) mem->slots[i].list = ++count; area->used -= nslots; spin_unlock_irqrestore(&area->lock, flags); dec_used(dev->dma_io_tlb_mem, nslots); } #ifdef CONFIG_SWIOTLB_DYNAMIC /** * swiotlb_del_transient() - delete a transient memory pool * @dev: Device which mapped the buffer. * @tlb_addr: Physical address within a bounce buffer. * @pool: Pointer to the transient memory pool to be checked and deleted. * * Check whether the address belongs to a transient SWIOTLB memory pool. * If yes, then delete the pool. * * Return: %true if @tlb_addr belonged to a transient pool that was released. */ static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr, struct io_tlb_pool *pool) { if (!pool->transient) return false; dec_used(dev->dma_io_tlb_mem, pool->nslabs); swiotlb_del_pool(dev, pool); dec_transient_used(dev->dma_io_tlb_mem, pool->nslabs); return true; } #else /* !CONFIG_SWIOTLB_DYNAMIC */ static inline bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr, struct io_tlb_pool *pool) { return false; } #endif /* CONFIG_SWIOTLB_DYNAMIC */ /* * tlb_addr is the physical address of the bounce buffer to unmap. */ void __swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, size_t mapping_size, enum dma_data_direction dir, unsigned long attrs, struct io_tlb_pool *pool) { /* * First, sync the memory before unmapping the entry */ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE, pool); if (swiotlb_del_transient(dev, tlb_addr, pool)) return; swiotlb_release_slots(dev, tlb_addr, pool); } void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir, struct io_tlb_pool *pool) { if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE, pool); else BUG_ON(dir != DMA_FROM_DEVICE); } void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir, struct io_tlb_pool *pool) { if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE, pool); else BUG_ON(dir != DMA_TO_DEVICE); } /* * Create a swiotlb mapping for the buffer at @paddr, and in case of DMAing * to the device copy the data into it as well. */ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir, unsigned long attrs) { phys_addr_t swiotlb_addr; dma_addr_t dma_addr; trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size); swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, 0, dir, attrs); if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; /* Ensure that the address returned is DMA'ble */ dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); if (unlikely(!dma_capable(dev, dma_addr, size, true))) { __swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC, swiotlb_find_pool(dev, swiotlb_addr)); dev_WARN_ONCE(dev, 1, "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); return DMA_MAPPING_ERROR; } if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) arch_sync_dma_for_device(swiotlb_addr, size, dir); return dma_addr; } size_t swiotlb_max_mapping_size(struct device *dev) { int min_align_mask = dma_get_min_align_mask(dev); int min_align = 0; /* * swiotlb_find_slots() skips slots according to * min align mask. This affects max mapping size. * Take it into acount here. */ if (min_align_mask) min_align = roundup(min_align_mask, IO_TLB_SIZE); return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align; } /** * is_swiotlb_allocated() - check if the default software IO TLB is initialized */ bool is_swiotlb_allocated(void) { return io_tlb_default_mem.nslabs; } bool is_swiotlb_active(struct device *dev) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; return mem && mem->nslabs; } /** * default_swiotlb_base() - get the base address of the default SWIOTLB * * Get the lowest physical address used by the default software IO TLB pool. */ phys_addr_t default_swiotlb_base(void) { #ifdef CONFIG_SWIOTLB_DYNAMIC io_tlb_default_mem.can_grow = false; #endif return io_tlb_default_mem.defpool.start; } /** * default_swiotlb_limit() - get the address limit of the default SWIOTLB * * Get the highest physical address used by the default software IO TLB pool. */ phys_addr_t default_swiotlb_limit(void) { #ifdef CONFIG_SWIOTLB_DYNAMIC return io_tlb_default_mem.phys_limit; #else return io_tlb_default_mem.defpool.end - 1; #endif } #ifdef CONFIG_DEBUG_FS #ifdef CONFIG_SWIOTLB_DYNAMIC static unsigned long mem_transient_used(struct io_tlb_mem *mem) { return atomic_long_read(&mem->transient_nslabs); } static int io_tlb_transient_used_get(void *data, u64 *val) { struct io_tlb_mem *mem = data; *val = mem_transient_used(mem); return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_transient_used, io_tlb_transient_used_get, NULL, "%llu\n"); #endif /* CONFIG_SWIOTLB_DYNAMIC */ static int io_tlb_used_get(void *data, u64 *val) { struct io_tlb_mem *mem = data; *val = mem_used(mem); return 0; } static int io_tlb_hiwater_get(void *data, u64 *val) { struct io_tlb_mem *mem = data; *val = atomic_long_read(&mem->used_hiwater); return 0; } static int io_tlb_hiwater_set(void *data, u64 val) { struct io_tlb_mem *mem = data; /* Only allow setting to zero */ if (val != 0) return -EINVAL; atomic_long_set(&mem->used_hiwater, val); return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get, io_tlb_hiwater_set, "%llu\n"); static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, const char *dirname) { mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs); if (!mem->nslabs) return; debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs); debugfs_create_file("io_tlb_used", 0400, mem->debugfs, mem, &fops_io_tlb_used); debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem, &fops_io_tlb_hiwater); #ifdef CONFIG_SWIOTLB_DYNAMIC debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs, mem, &fops_io_tlb_transient_used); #endif } static int __init swiotlb_create_default_debugfs(void) { swiotlb_create_debugfs_files(&io_tlb_default_mem, "swiotlb"); return 0; } late_initcall(swiotlb_create_default_debugfs); #else /* !CONFIG_DEBUG_FS */ static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, const char *dirname) { } #endif /* CONFIG_DEBUG_FS */ #ifdef CONFIG_DMA_RESTRICTED_POOL struct page *swiotlb_alloc(struct device *dev, size_t size) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; phys_addr_t tlb_addr; unsigned int align; int index; if (!mem) return NULL; align = (1 << (get_order(size) + PAGE_SHIFT)) - 1; index = swiotlb_find_slots(dev, 0, size, align, &pool); if (index == -1) return NULL; tlb_addr = slot_addr(pool->start, index); if (unlikely(!PAGE_ALIGNED(tlb_addr))) { dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n", &tlb_addr); swiotlb_release_slots(dev, tlb_addr, pool); return NULL; } return pfn_to_page(PFN_DOWN(tlb_addr)); } bool swiotlb_free(struct device *dev, struct page *page, size_t size) { phys_addr_t tlb_addr = page_to_phys(page); struct io_tlb_pool *pool; pool = swiotlb_find_pool(dev, tlb_addr); if (!pool) return false; swiotlb_release_slots(dev, tlb_addr, pool); return true; } static int rmem_swiotlb_device_init(struct reserved_mem *rmem, struct device *dev) { struct io_tlb_mem *mem = rmem->priv; unsigned long nslabs = rmem->size >> IO_TLB_SHIFT; /* Set Per-device io tlb area to one */ unsigned int nareas = 1; if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base)))) { dev_err(dev, "Restricted DMA pool must be accessible within the linear mapping."); return -EINVAL; } /* * Since multiple devices can share the same pool, the private data, * io_tlb_mem struct, will be initialized by the first device attached * to it. */ if (!mem) { struct io_tlb_pool *pool; mem = kzalloc(sizeof(*mem), GFP_KERNEL); if (!mem) return -ENOMEM; pool = &mem->defpool; pool->slots = kcalloc(nslabs, sizeof(*pool->slots), GFP_KERNEL); if (!pool->slots) { kfree(mem); return -ENOMEM; } pool->areas = kcalloc(nareas, sizeof(*pool->areas), GFP_KERNEL); if (!pool->areas) { kfree(pool->slots); kfree(mem); return -ENOMEM; } set_memory_decrypted((unsigned long)phys_to_virt(rmem->base), rmem->size >> PAGE_SHIFT); swiotlb_init_io_tlb_pool(pool, rmem->base, nslabs, false, nareas); mem->force_bounce = true; mem->for_alloc = true; #ifdef CONFIG_SWIOTLB_DYNAMIC spin_lock_init(&mem->lock); INIT_LIST_HEAD_RCU(&mem->pools); #endif add_mem_pool(mem, pool); rmem->priv = mem; swiotlb_create_debugfs_files(mem, rmem->name); } dev->dma_io_tlb_mem = mem; return 0; } static void rmem_swiotlb_device_release(struct reserved_mem *rmem, struct device *dev) { dev->dma_io_tlb_mem = &io_tlb_default_mem; } static const struct reserved_mem_ops rmem_swiotlb_ops = { .device_init = rmem_swiotlb_device_init, .device_release = rmem_swiotlb_device_release, }; static int __init rmem_swiotlb_setup(struct reserved_mem *rmem) { unsigned long node = rmem->fdt_node; if (of_get_flat_dt_prop(node, "reusable", NULL) || of_get_flat_dt_prop(node, "linux,cma-default", NULL) || of_get_flat_dt_prop(node, "linux,dma-default", NULL) || of_get_flat_dt_prop(node, "no-map", NULL)) return -EINVAL; rmem->ops = &rmem_swiotlb_ops; pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n", &rmem->base, (unsigned long)rmem->size / SZ_1M); return 0; } RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup); #endif /* CONFIG_DMA_RESTRICTED_POOL */ |
| 2 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | // SPDX-License-Identifier: GPL-2.0 /* dvb-usb-firmware.c is part of the DVB USB library. * * Copyright (C) 2004-6 Patrick Boettcher (patrick.boettcher@posteo.de) * see dvb-usb-init.c for copyright information. * * This file contains functions for downloading the firmware to Cypress FX 1 and 2 based devices. * * FIXME: This part does actually not belong to dvb-usb, but to the usb-subsystem. */ #include "dvb-usb-common.h" #include <linux/usb.h> struct usb_cypress_controller { int id; const char *name; /* name of the usb controller */ u16 cpu_cs_register; /* needs to be restarted, when the firmware has been downloaded. */ }; static struct usb_cypress_controller cypress[] = { { .id = DEVICE_SPECIFIC, .name = "Device specific", .cpu_cs_register = 0 }, { .id = CYPRESS_AN2135, .name = "Cypress AN2135", .cpu_cs_register = 0x7f92 }, { .id = CYPRESS_AN2235, .name = "Cypress AN2235", .cpu_cs_register = 0x7f92 }, { .id = CYPRESS_FX2, .name = "Cypress FX2", .cpu_cs_register = 0xe600 }, }; /* * load a firmware packet to the device */ static int usb_cypress_writemem(struct usb_device *udev,u16 addr,u8 *data, u8 len) { return usb_control_msg(udev, usb_sndctrlpipe(udev,0), 0xa0, USB_TYPE_VENDOR, addr, 0x00, data, len, 5000); } int usb_cypress_load_firmware(struct usb_device *udev, const struct firmware *fw, int type) { struct hexline *hx; u8 *buf; int ret, pos = 0; u16 cpu_cs_register = cypress[type].cpu_cs_register; buf = kmalloc(sizeof(*hx), GFP_KERNEL); if (!buf) return -ENOMEM; hx = (struct hexline *)buf; /* stop the CPU */ buf[0] = 1; if (usb_cypress_writemem(udev, cpu_cs_register, buf, 1) != 1) err("could not stop the USB controller CPU."); while ((ret = dvb_usb_get_hexline(fw, hx, &pos)) > 0) { deb_fw("writing to address 0x%04x (buffer: 0x%02x %02x)\n", hx->addr, hx->len, hx->chk); ret = usb_cypress_writemem(udev, hx->addr, hx->data, hx->len); if (ret != hx->len) { err("error while transferring firmware (transferred size: %d, block size: %d)", ret, hx->len); ret = -EINVAL; break; } } if (ret < 0) { err("firmware download failed at %d with %d",pos,ret); kfree(buf); return ret; } if (ret == 0) { /* restart the CPU */ buf[0] = 0; if (usb_cypress_writemem(udev, cpu_cs_register, buf, 1) != 1) { err("could not restart the USB controller CPU."); ret = -EINVAL; } } else ret = -EIO; kfree(buf); return ret; } EXPORT_SYMBOL(usb_cypress_load_firmware); int dvb_usb_download_firmware(struct usb_device *udev, const struct dvb_usb_device_properties *props) { int ret; const struct firmware *fw = NULL; if ((ret = request_firmware(&fw, props->firmware, &udev->dev)) != 0) { err("did not find the firmware file '%s' (status %d). You can use <kernel_dir>/scripts/get_dvb_firmware to get the firmware", props->firmware,ret); return ret; } info("downloading firmware from file '%s'",props->firmware); switch (props->usb_ctrl) { case CYPRESS_AN2135: case CYPRESS_AN2235: case CYPRESS_FX2: ret = usb_cypress_load_firmware(udev, fw, props->usb_ctrl); break; case DEVICE_SPECIFIC: if (props->download_firmware) ret = props->download_firmware(udev,fw); else { err("BUG: driver didn't specified a download_firmware-callback, although it claims to have a DEVICE_SPECIFIC one."); ret = -EINVAL; } break; default: ret = -EINVAL; break; } release_firmware(fw); return ret; } int dvb_usb_get_hexline(const struct firmware *fw, struct hexline *hx, int *pos) { u8 *b = (u8 *) &fw->data[*pos]; int data_offs = 4; if (*pos >= fw->size) return 0; memset(hx,0,sizeof(struct hexline)); hx->len = b[0]; if ((*pos + hx->len + 4) >= fw->size) return -EINVAL; hx->addr = b[1] | (b[2] << 8); hx->type = b[3]; if (hx->type == 0x04) { /* b[4] and b[5] are the Extended linear address record data field */ hx->addr |= (b[4] << 24) | (b[5] << 16); /* hx->len -= 2; data_offs += 2; */ } memcpy(hx->data,&b[data_offs],hx->len); hx->chk = b[hx->len + data_offs]; *pos += hx->len + 5; return *pos; } EXPORT_SYMBOL(dvb_usb_get_hexline); |
| 5 75 238 134 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TTY_H #define _LINUX_TTY_H #include <linux/fs.h> #include <linux/major.h> #include <linux/termios.h> #include <linux/workqueue.h> #include <linux/tty_driver.h> #include <linux/tty_ldisc.h> #include <linux/tty_port.h> #include <linux/mutex.h> #include <linux/tty_flags.h> #include <uapi/linux/tty.h> #include <linux/rwsem.h> #include <linux/llist.h> /* * (Note: the *_driver.minor_start values 1, 64, 128, 192 are * hardcoded at present.) */ #define NR_UNIX98_PTY_DEFAULT 4096 /* Default maximum for Unix98 ptys */ #define NR_UNIX98_PTY_RESERVE 1024 /* Default reserve for main devpts */ #define NR_UNIX98_PTY_MAX (1 << MINORBITS) /* Absolute limit */ /* * This character is the same as _POSIX_VDISABLE: it cannot be used as * a c_cc[] character, but indicates that a particular special character * isn't in use (eg VINTR has no character etc) */ #define __DISABLED_CHAR '\0' #define INTR_CHAR(tty) ((tty)->termios.c_cc[VINTR]) #define QUIT_CHAR(tty) ((tty)->termios.c_cc[VQUIT]) #define ERASE_CHAR(tty) ((tty)->termios.c_cc[VERASE]) #define KILL_CHAR(tty) ((tty)->termios.c_cc[VKILL]) #define EOF_CHAR(tty) ((tty)->termios.c_cc[VEOF]) #define TIME_CHAR(tty) ((tty)->termios.c_cc[VTIME]) #define MIN_CHAR(tty) ((tty)->termios.c_cc[VMIN]) #define SWTC_CHAR(tty) ((tty)->termios.c_cc[VSWTC]) #define START_CHAR(tty) ((tty)->termios.c_cc[VSTART]) #define STOP_CHAR(tty) ((tty)->termios.c_cc[VSTOP]) #define SUSP_CHAR(tty) ((tty)->termios.c_cc[VSUSP]) #define EOL_CHAR(tty) ((tty)->termios.c_cc[VEOL]) #define REPRINT_CHAR(tty) ((tty)->termios.c_cc[VREPRINT]) #define DISCARD_CHAR(tty) ((tty)->termios.c_cc[VDISCARD]) #define WERASE_CHAR(tty) ((tty)->termios.c_cc[VWERASE]) #define LNEXT_CHAR(tty) ((tty)->termios.c_cc[VLNEXT]) #define EOL2_CHAR(tty) ((tty)->termios.c_cc[VEOL2]) #define _I_FLAG(tty, f) ((tty)->termios.c_iflag & (f)) #define _O_FLAG(tty, f) ((tty)->termios.c_oflag & (f)) #define _C_FLAG(tty, f) ((tty)->termios.c_cflag & (f)) #define _L_FLAG(tty, f) ((tty)->termios.c_lflag & (f)) #define I_IGNBRK(tty) _I_FLAG((tty), IGNBRK) #define I_BRKINT(tty) _I_FLAG((tty), BRKINT) #define I_IGNPAR(tty) _I_FLAG((tty), IGNPAR) #define I_PARMRK(tty) _I_FLAG((tty), PARMRK) #define I_INPCK(tty) _I_FLAG((tty), INPCK) #define I_ISTRIP(tty) _I_FLAG((tty), ISTRIP) #define I_INLCR(tty) _I_FLAG((tty), INLCR) #define I_IGNCR(tty) _I_FLAG((tty), IGNCR) #define I_ICRNL(tty) _I_FLAG((tty), ICRNL) #define I_IUCLC(tty) _I_FLAG((tty), IUCLC) #define I_IXON(tty) _I_FLAG((tty), IXON) #define I_IXANY(tty) _I_FLAG((tty), IXANY) #define I_IXOFF(tty) _I_FLAG((tty), IXOFF) #define I_IMAXBEL(tty) _I_FLAG((tty), IMAXBEL) #define I_IUTF8(tty) _I_FLAG((tty), IUTF8) #define O_OPOST(tty) _O_FLAG((tty), OPOST) #define O_OLCUC(tty) _O_FLAG((tty), OLCUC) #define O_ONLCR(tty) _O_FLAG((tty), ONLCR) #define O_OCRNL(tty) _O_FLAG((tty), OCRNL) #define O_ONOCR(tty) _O_FLAG((tty), ONOCR) #define O_ONLRET(tty) _O_FLAG((tty), ONLRET) #define O_OFILL(tty) _O_FLAG((tty), OFILL) #define O_OFDEL(tty) _O_FLAG((tty), OFDEL) #define O_NLDLY(tty) _O_FLAG((tty), NLDLY) #define O_CRDLY(tty) _O_FLAG((tty), CRDLY) #define O_TABDLY(tty) _O_FLAG((tty), TABDLY) #define O_BSDLY(tty) _O_FLAG((tty), BSDLY) #define O_VTDLY(tty) _O_FLAG((tty), VTDLY) #define O_FFDLY(tty) _O_FLAG((tty), FFDLY) #define C_BAUD(tty) _C_FLAG((tty), CBAUD) #define C_CSIZE(tty) _C_FLAG((tty), CSIZE) #define C_CSTOPB(tty) _C_FLAG((tty), CSTOPB) #define C_CREAD(tty) _C_FLAG((tty), CREAD) #define C_PARENB(tty) _C_FLAG((tty), PARENB) #define C_PARODD(tty) _C_FLAG((tty), PARODD) #define C_HUPCL(tty) _C_FLAG((tty), HUPCL) #define C_CLOCAL(tty) _C_FLAG((tty), CLOCAL) #define C_CIBAUD(tty) _C_FLAG((tty), CIBAUD) #define C_CRTSCTS(tty) _C_FLAG((tty), CRTSCTS) #define C_CMSPAR(tty) _C_FLAG((tty), CMSPAR) #define L_ISIG(tty) _L_FLAG((tty), ISIG) #define L_ICANON(tty) _L_FLAG((tty), ICANON) #define L_XCASE(tty) _L_FLAG((tty), XCASE) #define L_ECHO(tty) _L_FLAG((tty), ECHO) #define L_ECHOE(tty) _L_FLAG((tty), ECHOE) #define L_ECHOK(tty) _L_FLAG((tty), ECHOK) #define L_ECHONL(tty) _L_FLAG((tty), ECHONL) #define L_NOFLSH(tty) _L_FLAG((tty), NOFLSH) #define L_TOSTOP(tty) _L_FLAG((tty), TOSTOP) #define L_ECHOCTL(tty) _L_FLAG((tty), ECHOCTL) #define L_ECHOPRT(tty) _L_FLAG((tty), ECHOPRT) #define L_ECHOKE(tty) _L_FLAG((tty), ECHOKE) #define L_FLUSHO(tty) _L_FLAG((tty), FLUSHO) #define L_PENDIN(tty) _L_FLAG((tty), PENDIN) #define L_IEXTEN(tty) _L_FLAG((tty), IEXTEN) #define L_EXTPROC(tty) _L_FLAG((tty), EXTPROC) struct device; struct signal_struct; struct tty_operations; /** * struct tty_struct - state associated with a tty while open * * @kref: reference counting by tty_kref_get() and tty_kref_put(), reaching zero * frees the structure * @dev: class device or %NULL (e.g. ptys, serdev) * @driver: &struct tty_driver operating this tty * @ops: &struct tty_operations of @driver for this tty (open, close, etc.) * @index: index of this tty (e.g. to construct @name like tty12) * @ldisc_sem: protects line discipline changes (@ldisc) -- lock tty not pty * @ldisc: the current line discipline for this tty (n_tty by default) * @atomic_write_lock: protects against concurrent writers, i.e. locks * @write_cnt, @write_buf and similar * @legacy_mutex: leftover from history (BKL -> BTM -> @legacy_mutex), * protecting several operations on this tty * @throttle_mutex: protects against concurrent tty_throttle_safe() and * tty_unthrottle_safe() (but not tty_unthrottle()) * @termios_rwsem: protects @termios and @termios_locked * @winsize_mutex: protects @winsize * @termios: termios for the current tty, copied from/to @driver.termios * @termios_locked: locked termios (by %TIOCGLCKTRMIOS and %TIOCSLCKTRMIOS * ioctls) * @name: name of the tty constructed by tty_line_name() (e.g. ttyS3) * @flags: bitwise OR of %TTY_THROTTLED, %TTY_IO_ERROR, ... * @count: count of open processes, reaching zero cancels all the work for * this tty and drops a @kref too (but does not free this tty) * @winsize: size of the terminal "window" (cf. @winsize_mutex) * @flow: flow settings grouped together * @flow.lock: lock for @flow members * @flow.stopped: tty stopped/started by stop_tty()/start_tty() * @flow.tco_stopped: tty stopped/started by %TCOOFF/%TCOON ioctls (it has * precedence over @flow.stopped) * @ctrl: control settings grouped together * @ctrl.lock: lock for @ctrl members * @ctrl.pgrp: process group of this tty (setpgrp(2)) * @ctrl.session: session of this tty (setsid(2)). Writes are protected by both * @ctrl.lock and @legacy_mutex, readers must use at least one of * them. * @ctrl.pktstatus: packet mode status (bitwise OR of %TIOCPKT_ constants) * @ctrl.packet: packet mode enabled * @hw_stopped: not controlled by the tty layer, under @driver's control for CTS * handling * @receive_room: bytes permitted to feed to @ldisc without any being lost * @flow_change: controls behavior of throttling, see tty_throttle_safe() and * tty_unthrottle_safe() * @link: link to another pty (master -> slave and vice versa) * @fasync: state for %O_ASYNC (for %SIGIO); managed by fasync_helper() * @write_wait: concurrent writers are waiting in this queue until they are * allowed to write * @read_wait: readers wait for data in this queue * @hangup_work: normally a work to perform a hangup (do_tty_hangup()); while * freeing the tty, (re)used to release_one_tty() * @disc_data: pointer to @ldisc's private data (e.g. to &struct n_tty_data) * @driver_data: pointer to @driver's private data (e.g. &struct uart_state) * @files_lock: protects @tty_files list * @tty_files: list of (re)openers of this tty (i.e. linked &struct * tty_file_private) * @closing: when set during close, n_tty processes only START & STOP chars * @write_buf: temporary buffer used during tty_write() to copy user data to * @write_cnt: count of bytes written in tty_write() to @write_buf * @SAK_work: if the tty has a pending do_SAK, it is queued here * @port: persistent storage for this device (i.e. &struct tty_port) * * All of the state associated with a tty while the tty is open. Persistent * storage for tty devices is referenced here as @port and is documented in * &struct tty_port. */ struct tty_struct { struct kref kref; int index; struct device *dev; struct tty_driver *driver; struct tty_port *port; const struct tty_operations *ops; struct tty_ldisc *ldisc; struct ld_semaphore ldisc_sem; struct mutex atomic_write_lock; struct mutex legacy_mutex; struct mutex throttle_mutex; struct rw_semaphore termios_rwsem; struct mutex winsize_mutex; struct ktermios termios, termios_locked; char name[64]; unsigned long flags; int count; unsigned int receive_room; struct winsize winsize; struct { spinlock_t lock; bool stopped; bool tco_stopped; } flow; struct { struct pid *pgrp; struct pid *session; spinlock_t lock; unsigned char pktstatus; bool packet; } ctrl; bool hw_stopped; bool closing; int flow_change; struct tty_struct *link; struct fasync_struct *fasync; wait_queue_head_t write_wait; wait_queue_head_t read_wait; struct work_struct hangup_work; void *disc_data; void *driver_data; spinlock_t files_lock; int write_cnt; u8 *write_buf; struct list_head tty_files; struct work_struct SAK_work; } __randomize_layout; /* Each of a tty's open files has private_data pointing to tty_file_private */ struct tty_file_private { struct tty_struct *tty; struct file *file; struct list_head list; }; /** * enum tty_struct_flags - TTY Struct Flags * * These bits are used in the :c:member:`tty_struct.flags` field. * * So that interrupts won't be able to mess up the queues, * copy_to_cooked must be atomic with respect to itself, as must * tty->write. Thus, you must use the inline functions set_bit() and * clear_bit() to make things atomic. * * @TTY_THROTTLED: * Driver input is throttled. The ldisc should call * :c:member:`tty_driver.unthrottle()` in order to resume reception when * it is ready to process more data (at threshold min). * * @TTY_IO_ERROR: * If set, causes all subsequent userspace read/write calls on the tty to * fail, returning -%EIO. (May be no ldisc too.) * * @TTY_OTHER_CLOSED: * Device is a pty and the other side has closed. * * @TTY_EXCLUSIVE: * Exclusive open mode (a single opener). * * @TTY_DO_WRITE_WAKEUP: * If set, causes the driver to call the * :c:member:`tty_ldisc_ops.write_wakeup()` method in order to resume * transmission when it can accept more data to transmit. * * @TTY_LDISC_OPEN: * Indicates that a line discipline is open. For debugging purposes only. * * @TTY_PTY_LOCK: * A flag private to pty code to implement %TIOCSPTLCK/%TIOCGPTLCK logic. * * @TTY_NO_WRITE_SPLIT: * Prevent driver from splitting up writes into smaller chunks (preserve * write boundaries to driver). * * @TTY_HUPPED: * The TTY was hung up. This is set post :c:member:`tty_driver.hangup()`. * * @TTY_HUPPING: * The TTY is in the process of hanging up to abort potential readers. * * @TTY_LDISC_CHANGING: * Line discipline for this TTY is being changed. I/O should not block * when this is set. Use tty_io_nonblock() to check. * * @TTY_LDISC_HALTED: * Line discipline for this TTY was stopped. No work should be queued to * this ldisc. */ enum tty_struct_flags { TTY_THROTTLED, TTY_IO_ERROR, TTY_OTHER_CLOSED, TTY_EXCLUSIVE, TTY_DO_WRITE_WAKEUP, TTY_LDISC_OPEN, TTY_PTY_LOCK, TTY_NO_WRITE_SPLIT, TTY_HUPPED, TTY_HUPPING, TTY_LDISC_CHANGING, TTY_LDISC_HALTED, }; static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) { return file->f_flags & O_NONBLOCK || test_bit(TTY_LDISC_CHANGING, &tty->flags); } static inline bool tty_io_error(struct tty_struct *tty) { return test_bit(TTY_IO_ERROR, &tty->flags); } static inline bool tty_throttled(struct tty_struct *tty) { return test_bit(TTY_THROTTLED, &tty->flags); } #ifdef CONFIG_TTY void tty_kref_put(struct tty_struct *tty); struct pid *tty_get_pgrp(struct tty_struct *tty); void tty_vhangup_self(void); void disassociate_ctty(int priv); dev_t tty_devnum(struct tty_struct *tty); void proc_clear_tty(struct task_struct *p); struct tty_struct *get_current_tty(void); /* tty_io.c */ int __init tty_init(void); const char *tty_name(const struct tty_struct *tty); struct tty_struct *tty_kopen_exclusive(dev_t device); struct tty_struct *tty_kopen_shared(dev_t device); void tty_kclose(struct tty_struct *tty); int tty_dev_name_to_number(const char *name, dev_t *number); #else static inline void tty_kref_put(struct tty_struct *tty) { } static inline struct pid *tty_get_pgrp(struct tty_struct *tty) { return NULL; } static inline void tty_vhangup_self(void) { } static inline void disassociate_ctty(int priv) { } static inline dev_t tty_devnum(struct tty_struct *tty) { return 0; } static inline void proc_clear_tty(struct task_struct *p) { } static inline struct tty_struct *get_current_tty(void) { return NULL; } /* tty_io.c */ static inline int __init tty_init(void) { return 0; } static inline const char *tty_name(const struct tty_struct *tty) { return "(none)"; } static inline struct tty_struct *tty_kopen_exclusive(dev_t device) { return ERR_PTR(-ENODEV); } static inline void tty_kclose(struct tty_struct *tty) { } static inline int tty_dev_name_to_number(const char *name, dev_t *number) { return -ENOTSUPP; } #endif extern struct ktermios tty_std_termios; int vcs_init(void); extern const struct class tty_class; /** * tty_kref_get - get a tty reference * @tty: tty device * * Returns: a new reference to a tty object * * Locking: The caller must hold sufficient locks/counts to ensure that their * existing reference cannot go away. */ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) { if (tty) kref_get(&tty->kref); return tty; } const char *tty_driver_name(const struct tty_struct *tty); void tty_wait_until_sent(struct tty_struct *tty, long timeout); void stop_tty(struct tty_struct *tty); void start_tty(struct tty_struct *tty); void tty_write_message(struct tty_struct *tty, char *msg); int tty_send_xchar(struct tty_struct *tty, u8 ch); int tty_put_char(struct tty_struct *tty, u8 c); unsigned int tty_chars_in_buffer(struct tty_struct *tty); unsigned int tty_write_room(struct tty_struct *tty); void tty_driver_flush_buffer(struct tty_struct *tty); void tty_unthrottle(struct tty_struct *tty); bool tty_throttle_safe(struct tty_struct *tty); bool tty_unthrottle_safe(struct tty_struct *tty); int tty_do_resize(struct tty_struct *tty, struct winsize *ws); int tty_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount); int tty_get_tiocm(struct tty_struct *tty); int is_current_pgrp_orphaned(void); void tty_hangup(struct tty_struct *tty); void tty_vhangup(struct tty_struct *tty); int tty_hung_up_p(struct file *filp); void do_SAK(struct tty_struct *tty); void __do_SAK(struct tty_struct *tty); void no_tty(void); speed_t tty_termios_baud_rate(const struct ktermios *termios); void tty_termios_encode_baud_rate(struct ktermios *termios, speed_t ibaud, speed_t obaud); void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, speed_t obaud); /** * tty_get_baud_rate - get tty bit rates * @tty: tty to query * * Returns: the baud rate as an integer for this terminal * * Locking: The termios lock must be held by the caller. */ static inline speed_t tty_get_baud_rate(const struct tty_struct *tty) { return tty_termios_baud_rate(&tty->termios); } unsigned char tty_get_char_size(unsigned int cflag); unsigned char tty_get_frame_size(unsigned int cflag); void tty_termios_copy_hw(struct ktermios *new, const struct ktermios *old); bool tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b); int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); void tty_wakeup(struct tty_struct *tty); int tty_mode_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); int tty_perform_flush(struct tty_struct *tty, unsigned long arg); struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx); void tty_release_struct(struct tty_struct *tty, int idx); void tty_init_termios(struct tty_struct *tty); void tty_save_termios(struct tty_struct *tty); int tty_standard_install(struct tty_driver *driver, struct tty_struct *tty); extern struct mutex tty_mutex; /* n_tty.c */ void n_tty_inherit_ops(struct tty_ldisc_ops *ops); #ifdef CONFIG_TTY void __init n_tty_init(void); #else static inline void n_tty_init(void) { } #endif /* tty_audit.c */ #ifdef CONFIG_AUDIT void tty_audit_exit(void); void tty_audit_fork(struct signal_struct *sig); int tty_audit_push(void); #else static inline void tty_audit_exit(void) { } static inline void tty_audit_fork(struct signal_struct *sig) { } static inline int tty_audit_push(void) { return 0; } #endif /* tty_ioctl.c */ int n_tty_ioctl_helper(struct tty_struct *tty, unsigned int cmd, unsigned long arg); /* vt.c */ int vt_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); long vt_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); /* tty_mutex.c */ /* functions for preparation of BKL removal */ void tty_lock(struct tty_struct *tty); int tty_lock_interruptible(struct tty_struct *tty); void tty_unlock(struct tty_struct *tty); void tty_lock_slave(struct tty_struct *tty); void tty_unlock_slave(struct tty_struct *tty); void tty_set_lock_subclass(struct tty_struct *tty); #endif |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* I2C message transfer tracepoints * * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #undef TRACE_SYSTEM #define TRACE_SYSTEM i2c #if !defined(_TRACE_I2C_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_I2C_H #include <linux/i2c.h> #include <linux/tracepoint.h> /* * drivers/i2c/i2c-core-base.c */ extern int i2c_transfer_trace_reg(void); extern void i2c_transfer_trace_unreg(void); /* * __i2c_transfer() write request */ TRACE_EVENT_FN(i2c_write, TP_PROTO(const struct i2c_adapter *adap, const struct i2c_msg *msg, int num), TP_ARGS(adap, msg, num), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, msg_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u16, len ) __dynamic_array(__u8, buf, msg->len) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->msg_nr = num; __entry->addr = msg->addr; __entry->flags = msg->flags; __entry->len = msg->len; memcpy(__get_dynamic_array(buf), msg->buf, msg->len); ), TP_printk("i2c-%d #%u a=%03x f=%04x l=%u [%*phD]", __entry->adapter_nr, __entry->msg_nr, __entry->addr, __entry->flags, __entry->len, __entry->len, __get_dynamic_array(buf) ), i2c_transfer_trace_reg, i2c_transfer_trace_unreg); /* * __i2c_transfer() read request */ TRACE_EVENT_FN(i2c_read, TP_PROTO(const struct i2c_adapter *adap, const struct i2c_msg *msg, int num), TP_ARGS(adap, msg, num), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, msg_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u16, len ) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->msg_nr = num; __entry->addr = msg->addr; __entry->flags = msg->flags; __entry->len = msg->len; ), TP_printk("i2c-%d #%u a=%03x f=%04x l=%u", __entry->adapter_nr, __entry->msg_nr, __entry->addr, __entry->flags, __entry->len ), i2c_transfer_trace_reg, i2c_transfer_trace_unreg); /* * __i2c_transfer() read reply */ TRACE_EVENT_FN(i2c_reply, TP_PROTO(const struct i2c_adapter *adap, const struct i2c_msg *msg, int num), TP_ARGS(adap, msg, num), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, msg_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u16, len ) __dynamic_array(__u8, buf, msg->len) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->msg_nr = num; __entry->addr = msg->addr; __entry->flags = msg->flags; __entry->len = msg->len; memcpy(__get_dynamic_array(buf), msg->buf, msg->len); ), TP_printk("i2c-%d #%u a=%03x f=%04x l=%u [%*phD]", __entry->adapter_nr, __entry->msg_nr, __entry->addr, __entry->flags, __entry->len, __entry->len, __get_dynamic_array(buf) ), i2c_transfer_trace_reg, i2c_transfer_trace_unreg); /* * __i2c_transfer() result */ TRACE_EVENT_FN(i2c_result, TP_PROTO(const struct i2c_adapter *adap, int num, int ret), TP_ARGS(adap, num, ret), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, nr_msgs ) __field(__s16, ret ) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->nr_msgs = num; __entry->ret = ret; ), TP_printk("i2c-%d n=%u ret=%d", __entry->adapter_nr, __entry->nr_msgs, __entry->ret ), i2c_transfer_trace_reg, i2c_transfer_trace_unreg); #endif /* _TRACE_I2C_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
| 470 10 407 2 1 42 1 3 1 4 458 469 415 53 202 268 470 469 467 1 469 470 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 | // SPDX-License-Identifier: GPL-2.0-or-later /* * HID quirks support for Linux * * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina * Copyright (c) 2007 Paul Walmsley */ /* */ #include <linux/hid.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/mutex.h> #include <linux/input/elan-i2c-ids.h> #include "hid-ids.h" /* * Alphabetically sorted by vendor then product. */ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_AASHIMA, USB_DEVICE_ID_AASHIMA_GAMEPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_AASHIMA, USB_DEVICE_ID_AASHIMA_PREDATOR), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_ADATA_XPG, USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_ADATA_XPG, USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE_DONGLE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_AFATECH, USB_DEVICE_ID_AFATECH_AF9016), HID_QUIRK_FULLSPEED_INTERVAL }, { HID_USB_DEVICE(USB_VENDOR_ID_AIREN, USB_DEVICE_ID_AIREN_SLIMPLUS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_AKAI_09E8, USB_DEVICE_ID_AKAI_09E8_MIDIMIX), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_AKAI, USB_DEVICE_ID_AKAI_MPKMINI2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_ALPS, USB_DEVICE_ID_IBM_GAMEPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_AMI, USB_DEVICE_ID_AMI_VIRT_KEYBOARD_AND_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVMC), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVM), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS124U), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS1758), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS682), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS692), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_UC100KM), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_MULTI_TOUCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE2), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_CHIC, USB_DEVICE_ID_CHIC_GAMEPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_3AXIS_5BUTTON_STICK), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_AXIS_295), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_COMBATSTICK), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FIGHTERSTICK), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_ECLIPSE_YOKE), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_PEDALS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_THROTTLE), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_COOLER_MASTER, USB_DEVICE_ID_COOLER_MASTER_MICE_DONGLE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70R), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K95RGB), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_M65RGB), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_GLAIVE_RGB), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_PRO_WIRELESS_KM5221W), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_DRACAL_RAPHNET, USB_DEVICE_ID_RAPHNET_2NES2SNES), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_DRACAL_RAPHNET, USB_DEVICE_ID_RAPHNET_4NES4SNES), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_REDRAGON_SEYMUR2), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE1), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE3), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_EGALAX_TOUCHCONTROLLER), HID_QUIRK_MULTI_INPUT | HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, HID_ANY_ID), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_EMS, USB_DEVICE_ID_EMS_TRIO_LINKER_PLUS_II), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_ETURBOTOUCH, USB_DEVICE_ID_ETURBOTOUCH_2968), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_ETURBOTOUCH, USB_DEVICE_ID_ETURBOTOUCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_FUTABA, USB_DEVICE_ID_LED_DISPLAY), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_SAT_ADAPTOR), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD), HID_QUIRK_MULTI_INPUT }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_GAMEVICE, USB_DEVICE_ID_GAMEVICE_GV186), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_GAMEVICE, USB_DEVICE_ID_GAMEVICE_KISHI), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_DRIVING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FIGHTING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FLYING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD_A096), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD_A293), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A), HID_QUIRK_ALWAYS_POLL }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_ELITE_PRESENTER_MOUSE_464A), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_094A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_0941), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_0641), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_1f4a), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_IDEACOM, USB_DEVICE_ID_IDEACOM_IDC6680), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_INNOMEDIA, USB_DEVICE_ID_INNEX_GENESIS_ATARI), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE_ID2), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M506), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_I405X), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406W), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_340), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_M508WX), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_M508X), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406XE), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_T609A), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_ODDOR_HANDBRAKE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_LEGION_GO_DUAL_DINPUT), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_LEGION_GO2_DUAL_DINPUT), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_602E), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6093), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C007), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_KEYBOARD_G710_PLUS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C01A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_MOUSE_0783), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PIXART_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE3_COVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE_PRO_2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TOUCH_COVER_2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MOJO, USB_DEVICE_ID_RETRO_ADAPTER), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MULTIPLE_1781, USB_DEVICE_ID_RAPHNET_4NES4SNES_OLD), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_NATSU, USB_DEVICE_ID_NATSU_GAMEPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_NEC, USB_DEVICE_ID_NEC_USB_GAME_PAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_NEXIO, USB_DEVICE_ID_NEXIO_MULTITOUCH_PTI0750), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_NEXTWINDOW, USB_DEVICE_ID_NEXTWINDOW_TOUCHSCREEN), HID_QUIRK_MULTI_INPUT}, { HID_USB_DEVICE(USB_VENDOR_ID_NOVATEK, USB_DEVICE_ID_NOVATEK_MOUSE), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_DUOSENSE), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_PANTHERLORD, USB_DEVICE_ID_PANTHERLORD_TWIN_USB_JOYSTICK), HID_QUIRK_MULTI_INPUT | HID_QUIRK_SKIP_OUTPUT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_PENMOUNT, USB_DEVICE_ID_PENMOUNT_1610), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_PENMOUNT, USB_DEVICE_ID_PENMOUNT_1640), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_PI_ENGINEERING, USB_DEVICE_ID_PI_ENGINEERING_VEC_USB_FOOTPEDAL), HID_QUIRK_HIDINPUT_FORCE }, { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4D22), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4E2A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D65), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4E22), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRODIGE, USB_DEVICE_ID_PRODIGE_CORDLESS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3003), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_REALTEK, USB_DEVICE_ID_REALTEK_READER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_RETROUSB, USB_DEVICE_ID_RETROUSB_SNES_RETROPAD), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_RETROUSB, USB_DEVICE_ID_RETROUSB_SNES_RETROPORT), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RUMBLEPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_X52), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_X52_2), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_X52_PRO), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_X65), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SENNHEISER, USB_DEVICE_ID_SENNHEISER_BTD500USB), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_SIGMA_MICRO, USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SIGMATEL, USB_DEVICE_ID_SIGMATEL_STMP3780), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS1030_TOUCH), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS817_TOUCH), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS9200_TOUCH), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS_TS), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SUN, USB_DEVICE_ID_RARITAN_KVM_DONGLE), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_1), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_2), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_HD), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_LTS1), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_LTS2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_QUAD_HD), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_TP_V103), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_DELL_K12A), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_DELL_K15A), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_TOPMAX, USB_DEVICE_ID_TOPMAX_COBRAPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_TOUCHPACK, USB_DEVICE_ID_TOUCHPACK_RTS), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_TPV, USB_DEVICE_ID_TPV_OPTICAL_TOUCHSCREEN_8882), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_TPV, USB_DEVICE_ID_TPV_OPTICAL_TOUCHSCREEN_8883), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_KEYBOARD), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWA60), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_WP5540), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_VRS, USB_DEVICE_ID_VRS_R295), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD2, USB_DEVICE_ID_SMARTJOY_DUAL_PLUS), HID_QUIRK_NOGET | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_QUAD_USB_JOYPAD), HID_QUIRK_NOGET | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_GROUP_AUDIO), HID_QUIRK_NOGET }, { 0 } }; /* * A list of devices for which there is a specialized driver on HID bus. * * Please note that for multitouch devices (driven by hid-multitouch driver), * there is a proper autodetection and autoloading in place (based on presence * of HID_DG_CONTACTID), so those devices don't need to be added to this list, * as we are doing the right thing in hid_scan_usage(). * * Autodetection for (USB) HID sensor hubs exists too. If a collection of type * physical is found inside a usage page of type sensor, hid-sensor-hub will be * used as a driver. See hid_scan_report(). */ static const struct hid_device_id hid_have_special_driver[] = { #if IS_ENABLED(CONFIG_HID_A4TECH) { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) }, { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_X5_005D) }, { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_RP_649) }, { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_NB_95) }, #endif #if IS_ENABLED(CONFIG_HID_ACCUTOUCH) { HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_ACCUTOUCH_2216) }, #endif #if IS_ENABLED(CONFIG_HID_ACRUX) { HID_USB_DEVICE(USB_VENDOR_ID_ACRUX, 0x0802) }, { HID_USB_DEVICE(USB_VENDOR_ID_ACRUX, 0xf705) }, #endif #if IS_ENABLED(CONFIG_HID_ALPS) { HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_DUAL) }, #endif #if IS_ENABLED(CONFIG_HID_APPLE) { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MIGHTYMOUSE) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_MINI_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_MINI_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_MINI_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4A_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4A_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4A_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680_ALT) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2015) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021) }, #endif #if IS_ENABLED(CONFIG_HID_APPLEIR) { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL2) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL3) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) }, #endif #if IS_ENABLED(CONFIG_HID_APPLETB_BL) { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) }, #endif #if IS_ENABLED(CONFIG_HID_APPLETB_KBD) { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) }, #endif #if IS_ENABLED(CONFIG_HID_ASUS) { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD) }, { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD) }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1) }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2) }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD3) }, { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_ASUS_MD_5112) }, { HID_USB_DEVICE(USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_ASUS_MD_5110) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100CHI_KEYBOARD) }, #endif #if IS_ENABLED(CONFIG_HID_AUREAL) { HID_USB_DEVICE(USB_VENDOR_ID_AUREAL, USB_DEVICE_ID_AUREAL_W01RN) }, #endif #if IS_ENABLED(CONFIG_HID_BELKIN) { HID_USB_DEVICE(USB_VENDOR_ID_BELKIN, USB_DEVICE_ID_FLIP_KVM) }, { HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) }, #endif #if IS_ENABLED(CONFIG_HID_BETOP_FF) { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185BFM, 0x2208) }, { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185PC, 0x5506) }, { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185V2PC, 0x1850) }, { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185V2BFM, 0x5500) }, #endif #if IS_ENABLED(CONFIG_HID_CHERRY) { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR) }, #endif #if IS_ENABLED(CONFIG_HID_CHICONY) { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_ASUS_AK1D) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, #endif #if IS_ENABLED(CONFIG_HID_CMEDIA) { HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM6533) }, #endif #if IS_ENABLED(CONFIG_HID_CORSAIR) { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_GLAIVE_RGB) }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) }, #endif #if IS_ENABLED(CONFIG_HID_CP2112) { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) }, #endif #if IS_ENABLED(CONFIG_HID_CYPRESS) { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_3) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_4) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE) }, #endif #if IS_ENABLED(CONFIG_HID_DRAGONRISE) { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0006) }, { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0011) }, #endif #if IS_ENABLED(CONFIG_HID_ELAN) { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_HP_X2_10_COVER) }, #endif #if IS_ENABLED(CONFIG_HID_ELECOM) { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XGL20DLBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT3URBK_00FB) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT3URBK_018F) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT3DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT4DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT2DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_010C) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_019B) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) }, #endif #if IS_ENABLED(CONFIG_HID_ELO) { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0009) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0030) }, #endif #if IS_ENABLED(CONFIG_HID_EMS_FF) { HID_USB_DEVICE(USB_VENDOR_ID_EMS, USB_DEVICE_ID_EMS_TRIO_LINKER_PLUS_II) }, #endif #if IS_ENABLED(CONFIG_HID_EZKEY) { HID_USB_DEVICE(USB_VENDOR_ID_EZKEY, USB_DEVICE_ID_BTC_8193) }, #endif #if IS_ENABLED(CONFIG_HID_GEMBIRD) { HID_USB_DEVICE(USB_VENDOR_ID_GEMBIRD, USB_DEVICE_ID_GEMBIRD_JPD_DUALFORCE2) }, #endif #if IS_ENABLED(CONFIG_HID_GFRM) { HID_BLUETOOTH_DEVICE(0x58, 0x2000) }, { HID_BLUETOOTH_DEVICE(0x471, 0x2210) }, #endif #if IS_ENABLED(CONFIG_HID_GREENASIA) { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0012) }, #endif #if IS_ENABLED(CONFIG_HID_GT683R) { HID_USB_DEVICE(USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL) }, #endif #if IS_ENABLED(CONFIG_HID_GYRATION) { HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE) }, { HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE_3) }, #endif #if IS_ENABLED(CONFIG_HID_HOLTEK) { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK, USB_DEVICE_ID_HOLTEK_ON_LINE_GRIP) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A04A) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A067) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A070) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A072) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) }, #endif #if IS_ENABLED(CONFIG_HID_ICADE) { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) }, #endif #if IS_ENABLED(CONFIG_HID_JABRA) { HID_USB_DEVICE(USB_VENDOR_ID_JABRA, HID_ANY_ID) }, #endif #if IS_ENABLED(CONFIG_HID_KENSINGTON) { HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) }, #endif #if IS_ENABLED(CONFIG_HID_KEYTOUCH) { HID_USB_DEVICE(USB_VENDOR_ID_KEYTOUCH, USB_DEVICE_ID_KEYTOUCH_IEC) }, #endif #if IS_ENABLED(CONFIG_HID_KYE) { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_MANTICORE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_GX_IMPERATOR) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_ERGO_525V) }, #endif #if IS_ENABLED(CONFIG_HID_LCPOWER) { HID_USB_DEVICE(USB_VENDOR_ID_LCPOWER, USB_DEVICE_ID_LCPOWER_LC1000) }, #endif #if IS_ENABLED(CONFIG_HID_LENOVO) { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPKBD) }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CUSBKBD) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CBTKBD) }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPPRODOCK) }, #endif #if IS_ENABLED(CONFIG_HID_LOGITECH) { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RECEIVER) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_DESKTOP) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_ELITE_KBD) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_CORDLESS_DESKTOP_LX500) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_EXTREME_3D) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DUAL_ACTION) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WHEEL) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD_CORD) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G29_WHEEL) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_F3D) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FG) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FFG) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_FORCE3D_PRO) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_FLIGHT_SYSTEM_G940) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOMO_WHEEL) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOMO_WHEEL2) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_VIBRATION_WHEEL) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DFP_WHEEL) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DFGT_WHEEL) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G25_WHEEL) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G27_WHEEL) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WII_WHEEL) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACETRAVELLER) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACENAVIGATOR) }, #endif #if IS_ENABLED(CONFIG_HID_LOGITECH_HIDPP) { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G920_WHEEL) }, #endif #if IS_ENABLED(CONFIG_HID_MAGICMOUSE) { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) }, #endif #if IS_ENABLED(CONFIG_HID_MAYFLASH) { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) }, { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR) }, { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE1) }, { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE2) }, { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE3) }, #endif #if IS_ENABLED(CONFIG_HID_MICROSOFT) { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_MOUSE_4500) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_KEYBOARD) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_SIDEWINDER_GV) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE4K) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE4K_JP) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE7K) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_LK6K) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_USB) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3K) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_OFFICE_KB) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_7K) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_600) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT) }, #endif #if IS_ENABLED(CONFIG_HID_MONTEREY) { HID_USB_DEVICE(USB_VENDOR_ID_MONTEREY, USB_DEVICE_ID_GENIUS_KB29E) }, #endif #if IS_ENABLED(CONFIG_HID_MULTITOUCH) { HID_USB_DEVICE(USB_VENDOR_ID_LG, USB_DEVICE_ID_LG_MELFAS_MT) }, #endif #if IS_ENABLED(CONFIG_HID_WIIMOTE) { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE2) }, #endif #if IS_ENABLED(CONFIG_HID_NTI) { HID_USB_DEVICE(USB_VENDOR_ID_NTI, USB_DEVICE_ID_USB_SUN) }, #endif #if IS_ENABLED(CONFIG_HID_NTRIG) { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_1) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_3) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_4) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_5) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_6) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_7) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_8) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_9) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_10) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_11) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_12) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_13) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_14) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_15) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_16) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_17) }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_18) }, #endif #if IS_ENABLED(CONFIG_HID_ORTEK) { HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_PKB1700) }, { HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_WKB2000) }, { HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_IHOME_IMAC_A210S) }, { HID_USB_DEVICE(USB_VENDOR_ID_SKYCABLE, USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER) }, #endif #if IS_ENABLED(CONFIG_HID_PANTHERLORD) { HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) }, { HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR) }, { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0003) }, { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) }, #endif #if IS_ENABLED(CONFIG_HID_PENMOUNT) { HID_USB_DEVICE(USB_VENDOR_ID_PENMOUNT, USB_DEVICE_ID_PENMOUNT_6000) }, #endif #if IS_ENABLED(CONFIG_HID_PETALYNX) { HID_USB_DEVICE(USB_VENDOR_ID_PETALYNX, USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE) }, #endif #if IS_ENABLED(CONFIG_HID_PICOLCD) { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD_BOOTLOADER) }, #endif #if IS_ENABLED(CONFIG_HID_PLANTRONICS) { HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS, HID_ANY_ID) }, #endif #if IS_ENABLED(CONFIG_HID_PLAYSTATION) { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_2) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) }, #endif #if IS_ENABLED(CONFIG_HID_PRIMAX) { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_KEYBOARD) }, #endif #if IS_ENABLED(CONFIG_HID_PRODIKEYS) { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) }, #endif #if IS_ENABLED(CONFIG_HID_RETRODE) { HID_USB_DEVICE(USB_VENDOR_ID_FUTURE_TECHNOLOGY, USB_DEVICE_ID_RETRODE2) }, #endif #if IS_ENABLED(CONFIG_HID_RMI) { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_COVER) }, { HID_USB_DEVICE(USB_VENDOR_ID_RAZER, USB_DEVICE_ID_RAZER_BLADE_14) }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_REZEL) }, #endif #if IS_ENABLED(CONFIG_HID_ROCCAT) { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ARVO) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ISKU) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ISKUFX) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONE) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONEPLUS) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONEPURE) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONEPURE_OPTICAL) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONEXTD) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KOVAPLUS) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_LUA) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_PYRA_WIRED) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_PYRA_WIRELESS) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_RYOS_MK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_RYOS_MK_GLOW) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_RYOS_MK_PRO) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_SAVU) }, #endif #if IS_ENABLED(CONFIG_HID_SAITEK) { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_PS1000) }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RAT7_OLD) }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RAT7) }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RAT9) }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_MMO7) }, { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT5) }, { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT9) }, { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_MMO7) }, #endif #if IS_ENABLED(CONFIG_HID_SAMSUNG) { HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE) }, { HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE) }, #endif #if IS_ENABLED(CONFIG_HID_SMARTJOYPLUS) { HID_USB_DEVICE(USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_3_PRO) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_DUAL_BOX_PRO) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_5_PRO) }, #endif #if IS_ENABLED(CONFIG_HID_SONY) { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SMK, USB_DEVICE_ID_SMK_PS3_BDREMOTE) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SMK, USB_DEVICE_ID_SMK_NSG_MR5U_REMOTE) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SMK, USB_DEVICE_ID_SMK_NSG_MR7U_REMOTE) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_BUZZ_CONTROLLER) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_WIRELESS_BUZZ_CONTROLLER) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_MOTION_CONTROLLER) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_MOTION_CONTROLLER) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_NAVIGATION_CONTROLLER) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_NAVIGATION_CONTROLLER) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_BDREMOTE) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGP_MOUSE) }, { HID_USB_DEVICE(USB_VENDOR_ID_SINO_LITE, USB_DEVICE_ID_SINO_LITE_CONTROLLER) }, #endif #if IS_ENABLED(CONFIG_HID_SPEEDLINK) { HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) }, #endif #if IS_ENABLED(CONFIG_HID_STEELSERIES) { HID_USB_DEVICE(USB_VENDOR_ID_STEELSERIES, USB_DEVICE_ID_STEELSERIES_SRWS1) }, { HID_USB_DEVICE(USB_VENDOR_ID_STEELSERIES, USB_DEVICE_ID_STEELSERIES_ARCTIS_1) }, { HID_USB_DEVICE(USB_VENDOR_ID_STEELSERIES, USB_DEVICE_ID_STEELSERIES_ARCTIS_9) }, #endif #if IS_ENABLED(CONFIG_HID_SUNPLUS) { HID_USB_DEVICE(USB_VENDOR_ID_SUNPLUS, USB_DEVICE_ID_SUNPLUS_WDESKTOP) }, #endif #if IS_ENABLED(CONFIG_HID_THRUSTMASTER) { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb300) }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304) }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb323) }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb324) }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb605) }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb651) }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb653) }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb654) }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb65a) }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb65d) }, #endif #if IS_ENABLED(CONFIG_HID_TIVO) { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_BT) }, { HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE) }, { HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_PRO) }, #endif #if IS_ENABLED(CONFIG_HID_TOPSEED) { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE) }, { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS) }, { HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) }, { HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED2, USB_DEVICE_ID_TOPSEED2_RF_COMBO) }, #endif #if IS_ENABLED(CONFIG_HID_TWINHAN) { HID_USB_DEVICE(USB_VENDOR_ID_TWINHAN, USB_DEVICE_ID_TWINHAN_IR_REMOTE) }, #endif #if IS_ENABLED(CONFIG_HID_UDRAW_PS3) { HID_USB_DEVICE(USB_VENDOR_ID_THQ, USB_DEVICE_ID_THQ_PS3_UDRAW) }, #endif #if IS_ENABLED(CONFIG_HID_XINMO) { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) }, { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) }, #endif #if IS_ENABLED(CONFIG_HID_ZEROPLUS) { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) }, { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) }, #endif #if IS_ENABLED(CONFIG_HID_ZYDACRON) { HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) }, #endif { } }; /* a list of devices that shouldn't be handled by HID core at all */ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ACECAD, USB_DEVICE_ID_ACECAD_FLAIR) }, { HID_USB_DEVICE(USB_VENDOR_ID_ACECAD, USB_DEVICE_ID_ACECAD_302) }, { HID_USB_DEVICE(USB_VENDOR_ID_ADS_TECH, USB_DEVICE_ID_ADS_TECH_RADIO_SI470X) }, { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_01) }, { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_10) }, { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_20) }, { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_21) }, { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_22) }, { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_23) }, { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_24) }, { HID_USB_DEVICE(USB_VENDOR_ID_AIRCABLE, USB_DEVICE_ID_AIRCABLE1) }, { HID_USB_DEVICE(USB_VENDOR_ID_ALCOR, USB_DEVICE_ID_ALCOR_USBRS232) }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_LCM)}, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_LCM2)}, { HID_USB_DEVICE(USB_VENDOR_ID_AVERMEDIA, USB_DEVICE_ID_AVER_FM_MR800) }, { HID_USB_DEVICE(USB_VENDOR_ID_AXENTIA, USB_DEVICE_ID_AXENTIA_FM_RADIO) }, { HID_USB_DEVICE(USB_VENDOR_ID_BERKSHIRE, USB_DEVICE_ID_BERKSHIRE_PCWD) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA2) }, { HID_USB_DEVICE(USB_VENDOR_ID_CIDC, 0x0103) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_RADIO_SI470X) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_RADIO_SI4713) }, { HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM109) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_HIDCOM) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_ULTRAMOUSE) }, { HID_USB_DEVICE(USB_VENDOR_ID_DEALEXTREAME, USB_DEVICE_ID_DEALEXTREAME_RADIO_SI4701) }, { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE) }, { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20) }, { HID_USB_DEVICE(USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5) }, { HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC5UH) }, { HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC4UM) }, { HID_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, 0x0001) }, { HID_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, 0x0002) }, { HID_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, 0x0004) }, { HID_USB_DEVICE(USB_VENDOR_ID_GOTOP, USB_DEVICE_ID_SUPER_Q2) }, { HID_USB_DEVICE(USB_VENDOR_ID_GOTOP, USB_DEVICE_ID_GOGOPEN) }, { HID_USB_DEVICE(USB_VENDOR_ID_GOTOP, USB_DEVICE_ID_PENPOWER) }, { HID_USB_DEVICE(USB_VENDOR_ID_GRETAGMACBETH, USB_DEVICE_ID_GRETAGMACBETH_HUEY) }, { HID_USB_DEVICE(USB_VENDOR_ID_GRIFFIN, USB_DEVICE_ID_POWERMATE) }, { HID_USB_DEVICE(USB_VENDOR_ID_GRIFFIN, USB_DEVICE_ID_SOUNDKNOB) }, { HID_USB_DEVICE(USB_VENDOR_ID_GRIFFIN, USB_DEVICE_ID_RADIOSHARK) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_90) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_100) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_101) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_103) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_104) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_105) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_106) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_107) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_108) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_200) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_201) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_202) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_203) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_204) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_205) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_206) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_207) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_300) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_301) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_302) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_303) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_304) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_305) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_306) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_307) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_308) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_309) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_400) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_401) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_402) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_403) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_404) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_405) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_500) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_501) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_502) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_503) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_504) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1000) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1001) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1002) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1003) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1004) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1005) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1006) }, { HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1007) }, { HID_USB_DEVICE(USB_VENDOR_ID_IMATION, USB_DEVICE_ID_DISC_STAKKA) }, { HID_USB_DEVICE(USB_VENDOR_ID_JABRA, USB_DEVICE_ID_JABRA_GN9350E) }, { HID_USB_DEVICE(USB_VENDOR_ID_KBGEAR, USB_DEVICE_ID_KBGEAR_JAMSTUDIO) }, { HID_USB_DEVICE(USB_VENDOR_ID_KWORLD, USB_DEVICE_ID_KWORLD_RADIO_FM700) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_GPEN_560) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_KYE, 0x0058) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CASSY) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CASSY2) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POCKETCASSY) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POCKETCASSY2) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOBILECASSY) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOBILECASSY2) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYVOLTAGE) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYCURRENT) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTIME) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYPH) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERANALYSERCASSY) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETESTCASSY) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIC) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIB) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_XRAY) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_XRAY2) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_VIDEOCOM) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOTOR) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_COM3LAB) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_TELEPORT) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_NETWORKANALYSER) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERCONTROL) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETEST) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOSTANALYSER) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOSTANALYSER2) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_ABSESP) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_AUTODATABUS) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MCT) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_HYBRID) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_HEATCONTROL) }, { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_BEATPAD) }, { HID_USB_DEVICE(USB_VENDOR_ID_MCC, USB_DEVICE_ID_MCC_PMD1024LS) }, { HID_USB_DEVICE(USB_VENDOR_ID_MCC, USB_DEVICE_ID_MCC_PMD1208LS) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICKIT1) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICKIT2) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICK16F1454) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICK16F1454_V2) }, { HID_USB_DEVICE(USB_VENDOR_ID_NATIONAL_SEMICONDUCTOR, USB_DEVICE_ID_N_S_HARMONY) }, { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100) }, { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 20) }, { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 30) }, { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 100) }, { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 108) }, { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 118) }, { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 200) }, { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 300) }, { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 400) }, { HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 500) }, { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0001) }, { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0002) }, { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0003) }, { HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0004) }, { HID_USB_DEVICE(USB_VENDOR_ID_PETZL, USB_DEVICE_ID_PETZL_HEADLAMP) }, { HID_USB_DEVICE(USB_VENDOR_ID_PHILIPS, USB_DEVICE_ID_PHILIPS_IEEE802154_DONGLE) }, { HID_USB_DEVICE(USB_VENDOR_ID_POWERCOM, USB_DEVICE_ID_POWERCOM_UPS) }, { HID_USB_DEVICE(USB_VENDOR_ID_SAI, USB_DEVICE_ID_CYPRESS_HIDCOM) }, #if IS_ENABLED(CONFIG_MOUSE_SYNAPTICS_USB) { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_TP) }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_INT_TP) }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_CPAD) }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_STICK) }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_WP) }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_COMP_TP) }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_WTP) }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_DPAD) }, #endif { HID_USB_DEVICE(USB_VENDOR_ID_YEALINK, USB_DEVICE_ID_YEALINK_P1K_P4K_B2K) }, { HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_HP_5MP_CAMERA_5473) }, { } }; /* * hid_mouse_ignore_list - mouse devices which should not be handled by the hid layer * * There are composite devices for which we want to ignore only a certain * interface. This is a list of devices for which only the mouse interface will * be ignored. This allows a dedicated driver to take care of the interface. */ static const struct hid_device_id hid_mouse_ignore_list[] = { /* appletouch driver */ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4A_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4A_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4A_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) }, { } }; bool hid_ignore(struct hid_device *hdev) { int i; if (hdev->quirks & HID_QUIRK_NO_IGNORE) return false; if (hdev->quirks & HID_QUIRK_IGNORE) return true; switch (hdev->vendor) { case USB_VENDOR_ID_CODEMERCS: /* ignore all Code Mercenaries IOWarrior devices */ if (hdev->product >= USB_DEVICE_ID_CODEMERCS_IOW_FIRST && hdev->product <= USB_DEVICE_ID_CODEMERCS_IOW_LAST) return true; break; case USB_VENDOR_ID_LOGITECH: if (hdev->product >= USB_DEVICE_ID_LOGITECH_HARMONY_FIRST && hdev->product <= USB_DEVICE_ID_LOGITECH_HARMONY_LAST) return true; /* * The Keene FM transmitter USB device has the same USB ID as * the Logitech AudioHub Speaker, but it should ignore the hid. * Check if the name is that of the Keene device. * For reference: the name of the AudioHub is * "HOLTEK AudioHub Speaker". */ if (hdev->product == USB_DEVICE_ID_LOGITECH_AUDIOHUB && !strcmp(hdev->name, "HOLTEK B-LINK USB Audio ")) return true; break; case USB_VENDOR_ID_SOUNDGRAPH: if (hdev->product >= USB_DEVICE_ID_SOUNDGRAPH_IMON_FIRST && hdev->product <= USB_DEVICE_ID_SOUNDGRAPH_IMON_LAST) return true; break; case USB_VENDOR_ID_HANWANG: if (hdev->product >= USB_DEVICE_ID_HANWANG_TABLET_FIRST && hdev->product <= USB_DEVICE_ID_HANWANG_TABLET_LAST) return true; break; case USB_VENDOR_ID_JESS: if (hdev->product == USB_DEVICE_ID_JESS_YUREX && hdev->type == HID_TYPE_USBNONE) return true; break; case USB_VENDOR_ID_VELLEMAN: /* These are not HID devices. They are handled by comedi. */ if ((hdev->product >= USB_DEVICE_ID_VELLEMAN_K8055_FIRST && hdev->product <= USB_DEVICE_ID_VELLEMAN_K8055_LAST) || (hdev->product >= USB_DEVICE_ID_VELLEMAN_K8061_FIRST && hdev->product <= USB_DEVICE_ID_VELLEMAN_K8061_LAST)) return true; break; case USB_VENDOR_ID_ATMEL_V_USB: /* Masterkit MA901 usb radio based on Atmel tiny85 chip and * it has the same USB ID as many Atmel V-USB devices. This * usb radio is handled by radio-ma901.c driver so we want * ignore the hid. Check the name, bus, product and ignore * if we have MA901 usb radio. */ if (hdev->product == USB_DEVICE_ID_ATMEL_V_USB && hdev->bus == BUS_USB && strncmp(hdev->name, "www.masterkit.ru MA901", 22) == 0) return true; break; case USB_VENDOR_ID_ELAN: /* * Blacklist of everything that gets handled by the elan_i2c * input driver. This avoids disabling valid touchpads and * other ELAN devices. */ if ((hdev->product == 0x0401 || hdev->product == 0x0400)) for (i = 0; strlen(elan_acpi_id[i].id); ++i) if (!strncmp(hdev->name, elan_acpi_id[i].id, strlen(elan_acpi_id[i].id))) return true; break; case USB_VENDOR_ID_JIELI_SDK_DEFAULT: /* * Multiple USB devices with identical IDs (mic & touchscreen). * The touch screen requires hid core processing, but the * microphone does not. They can be distinguished by manufacturer * and serial number. */ if (hdev->product == USB_DEVICE_ID_JIELI_SDK_4155 && strncmp(hdev->name, "SmartlinkTechnology", 19) == 0 && strncmp(hdev->uniq, "20201111000001", 14) == 0) return true; break; } if (hdev->type == HID_TYPE_USBMOUSE && hdev->quirks & HID_QUIRK_IGNORE_MOUSE) return true; return !!hid_match_id(hdev, hid_ignore_list); } EXPORT_SYMBOL_GPL(hid_ignore); /* Dynamic HID quirks list - specified at runtime */ struct quirks_list_struct { struct hid_device_id hid_bl_item; struct list_head node; }; static LIST_HEAD(dquirks_list); static DEFINE_MUTEX(dquirks_lock); /* Runtime ("dynamic") quirks manipulation functions */ /** * hid_exists_dquirk - find any dynamic quirks for a HID device * @hdev: the HID device to match * * Description: * Scans dquirks_list for a matching dynamic quirk and returns * the pointer to the relevant struct hid_device_id if found. * Must be called with a read lock held on dquirks_lock. * * Return: NULL if no quirk found, struct hid_device_id * if found. */ static struct hid_device_id *hid_exists_dquirk(const struct hid_device *hdev) { struct quirks_list_struct *q; struct hid_device_id *bl_entry = NULL; list_for_each_entry(q, &dquirks_list, node) { if (hid_match_one_id(hdev, &q->hid_bl_item)) { bl_entry = &q->hid_bl_item; break; } } if (bl_entry != NULL) dbg_hid("Found dynamic quirk 0x%lx for HID device 0x%04x:0x%04x\n", bl_entry->driver_data, bl_entry->vendor, bl_entry->product); return bl_entry; } /** * hid_modify_dquirk - add/replace a HID quirk * @id: the HID device to match * @quirks: the unsigned long quirks value to add/replace * * Description: * If an dynamic quirk exists in memory for this device, replace its * quirks value with what was provided. Otherwise, add the quirk * to the dynamic quirks list. * * Return: 0 OK, -error on failure. */ static int hid_modify_dquirk(const struct hid_device_id *id, const unsigned long quirks) { struct hid_device *hdev; struct quirks_list_struct *q_new, *q; int list_edited = 0; int ret = 0; hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); if (!hdev) return -ENOMEM; q_new = kmalloc(sizeof(struct quirks_list_struct), GFP_KERNEL); if (!q_new) { ret = -ENOMEM; goto out; } hdev->bus = q_new->hid_bl_item.bus = id->bus; hdev->group = q_new->hid_bl_item.group = id->group; hdev->vendor = q_new->hid_bl_item.vendor = id->vendor; hdev->product = q_new->hid_bl_item.product = id->product; q_new->hid_bl_item.driver_data = quirks; mutex_lock(&dquirks_lock); list_for_each_entry(q, &dquirks_list, node) { if (hid_match_one_id(hdev, &q->hid_bl_item)) { list_replace(&q->node, &q_new->node); kfree(q); list_edited = 1; break; } } if (!list_edited) list_add_tail(&q_new->node, &dquirks_list); mutex_unlock(&dquirks_lock); out: kfree(hdev); return ret; } /** * hid_remove_all_dquirks - remove all runtime HID quirks from memory * @bus: bus to match against. Use HID_BUS_ANY if all need to be removed. * * Description: * Free all memory associated with dynamic quirks - called before * module unload. * */ static void hid_remove_all_dquirks(__u16 bus) { struct quirks_list_struct *q, *temp; mutex_lock(&dquirks_lock); list_for_each_entry_safe(q, temp, &dquirks_list, node) { if (bus == HID_BUS_ANY || bus == q->hid_bl_item.bus) { list_del(&q->node); kfree(q); } } mutex_unlock(&dquirks_lock); } /** * hid_quirks_init - apply HID quirks specified at module load time * @quirks_param: array of quirks strings (vendor:product:quirks) * @bus: bus type * @count: number of quirks to check */ int hid_quirks_init(char **quirks_param, __u16 bus, int count) { struct hid_device_id id = { 0 }; int n = 0, m; unsigned short int vendor, product; u32 quirks; id.bus = bus; for (; n < count && quirks_param[n]; n++) { m = sscanf(quirks_param[n], "0x%hx:0x%hx:0x%x", &vendor, &product, &quirks); id.vendor = (__u16)vendor; id.product = (__u16)product; if (m != 3 || hid_modify_dquirk(&id, quirks) != 0) { pr_warn("Could not parse HID quirk module param %s\n", quirks_param[n]); } } return 0; } EXPORT_SYMBOL_GPL(hid_quirks_init); /** * hid_quirks_exit - release memory associated with dynamic_quirks * @bus: a bus to match against * * Description: * Release all memory associated with dynamic quirks for a given bus. * Called upon module unload. * Use HID_BUS_ANY to remove all dynamic quirks. * * Returns: nothing */ void hid_quirks_exit(__u16 bus) { hid_remove_all_dquirks(bus); } EXPORT_SYMBOL_GPL(hid_quirks_exit); /** * hid_gets_squirk - return any static quirks for a HID device * @hdev: the HID device to match * * Description: * Given a HID device, return a pointer to the quirked hid_device_id entry * associated with that device. * * Return: the quirks. */ static unsigned long hid_gets_squirk(const struct hid_device *hdev) { const struct hid_device_id *bl_entry; unsigned long quirks = hdev->initial_quirks; if (hid_match_id(hdev, hid_ignore_list)) quirks |= HID_QUIRK_IGNORE; if (hid_match_id(hdev, hid_mouse_ignore_list)) quirks |= HID_QUIRK_IGNORE_MOUSE; if (hid_match_id(hdev, hid_have_special_driver)) quirks |= HID_QUIRK_HAVE_SPECIAL_DRIVER; bl_entry = hid_match_id(hdev, hid_quirks); if (bl_entry != NULL) quirks |= bl_entry->driver_data; if (quirks) dbg_hid("Found squirk 0x%lx for HID device 0x%04x:0x%04x\n", quirks, hdev->vendor, hdev->product); return quirks; } /** * hid_lookup_quirk - return any quirks associated with a HID device * @hdev: the HID device to look for * * Description: * Given a HID device, return any quirks associated with that device. * * Return: an unsigned long quirks value. */ unsigned long hid_lookup_quirk(const struct hid_device *hdev) { unsigned long quirks = 0; const struct hid_device_id *quirk_entry = NULL; /* NCR devices must not be queried for reports */ if (hdev->bus == BUS_USB && hdev->vendor == USB_VENDOR_ID_NCR && hdev->product >= USB_DEVICE_ID_NCR_FIRST && hdev->product <= USB_DEVICE_ID_NCR_LAST) return HID_QUIRK_NO_INIT_REPORTS; /* These devices must be ignored if version (bcdDevice) is too old */ if (hdev->bus == BUS_USB && hdev->vendor == USB_VENDOR_ID_JABRA) { switch (hdev->product) { case USB_DEVICE_ID_JABRA_SPEAK_410: if (hdev->version < 0x0111) return HID_QUIRK_IGNORE; break; case USB_DEVICE_ID_JABRA_SPEAK_510: if (hdev->version < 0x0214) return HID_QUIRK_IGNORE; break; } } mutex_lock(&dquirks_lock); quirk_entry = hid_exists_dquirk(hdev); if (quirk_entry) quirks = quirk_entry->driver_data; else quirks = hid_gets_squirk(hdev); mutex_unlock(&dquirks_lock); return quirks; } EXPORT_SYMBOL_GPL(hid_lookup_quirk); |
| 1 1 4 4 17 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 | // SPDX-License-Identifier: GPL-2.0 #include <linux/proc_fs.h> #include <linux/ethtool.h> #include <linux/export.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/bonding.h> #include "bonding_priv.h" static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct bonding *bond = pde_data(file_inode(seq->file)); struct list_head *iter; struct slave *slave; loff_t off = 0; rcu_read_lock(); if (*pos == 0) return SEQ_START_TOKEN; bond_for_each_slave_rcu(bond, slave, iter) if (++off == *pos) return slave; return NULL; } static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bonding *bond = pde_data(file_inode(seq->file)); struct list_head *iter; struct slave *slave; bool found = false; ++*pos; if (v == SEQ_START_TOKEN) return bond_first_slave_rcu(bond); bond_for_each_slave_rcu(bond, slave, iter) { if (found) return slave; if (slave == v) found = true; } return NULL; } static void bond_info_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static void bond_info_show_master(struct seq_file *seq) { struct bonding *bond = pde_data(file_inode(seq->file)); const struct bond_opt_value *optval; struct slave *curr, *primary; int i; curr = rcu_dereference(bond->curr_active_slave); seq_printf(seq, "Bonding Mode: %s", bond_mode_name(BOND_MODE(bond))); if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP && bond->params.fail_over_mac) { optval = bond_opt_get_val(BOND_OPT_FAIL_OVER_MAC, bond->params.fail_over_mac); seq_printf(seq, " (fail_over_mac %s)", optval->string); } seq_printf(seq, "\n"); if (bond_mode_uses_xmit_hash(bond)) { optval = bond_opt_get_val(BOND_OPT_XMIT_HASH, bond->params.xmit_policy); seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", optval->string, bond->params.xmit_policy); } if (bond_uses_primary(bond)) { primary = rcu_dereference(bond->primary_slave); seq_printf(seq, "Primary Slave: %s", primary ? primary->dev->name : "None"); if (primary) { optval = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT, bond->params.primary_reselect); seq_printf(seq, " (primary_reselect %s)", optval->string); } seq_printf(seq, "\nCurrently Active Slave: %s\n", (curr) ? curr->dev->name : "None"); } seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ? "up" : "down"); seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon); seq_printf(seq, "Up Delay (ms): %d\n", bond->params.updelay * bond->params.miimon); seq_printf(seq, "Down Delay (ms): %d\n", bond->params.downdelay * bond->params.miimon); seq_printf(seq, "Peer Notification Delay (ms): %d\n", bond->params.peer_notif_delay * bond->params.miimon); /* ARP information */ if (bond->params.arp_interval > 0) { int printed = 0; seq_printf(seq, "ARP Polling Interval (ms): %d\n", bond->params.arp_interval); seq_printf(seq, "ARP Missed Max: %u\n", bond->params.missed_max); seq_printf(seq, "ARP IP target/s (n.n.n.n form):"); for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { if (!bond->params.arp_targets[i]) break; if (printed) seq_printf(seq, ","); seq_printf(seq, " %pI4", &bond->params.arp_targets[i]); printed = 1; } seq_printf(seq, "\n"); #if IS_ENABLED(CONFIG_IPV6) printed = 0; seq_printf(seq, "NS IPv6 target/s (xx::xx form):"); for (i = 0; (i < BOND_MAX_NS_TARGETS); i++) { if (ipv6_addr_any(&bond->params.ns_targets[i])) break; if (printed) seq_printf(seq, ","); seq_printf(seq, " %pI6c", &bond->params.ns_targets[i]); printed = 1; } seq_printf(seq, "\n"); #endif } if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info ad_info; seq_puts(seq, "\n802.3ad info\n"); seq_printf(seq, "LACP active: %s\n", (bond->params.lacp_active) ? "on" : "off"); seq_printf(seq, "LACP rate: %s\n", (bond->params.lacp_fast) ? "fast" : "slow"); seq_printf(seq, "Min links: %d\n", bond->params.min_links); optval = bond_opt_get_val(BOND_OPT_AD_SELECT, bond->params.ad_select); seq_printf(seq, "Aggregator selection policy (ad_select): %s\n", optval->string); if (capable(CAP_NET_ADMIN)) { seq_printf(seq, "System priority: %d\n", BOND_AD_INFO(bond).system.sys_priority); seq_printf(seq, "System MAC address: %pM\n", &BOND_AD_INFO(bond).system.sys_mac_addr); if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { seq_printf(seq, "bond %s has no active aggregator\n", bond->dev->name); } else { seq_printf(seq, "Active Aggregator Info:\n"); seq_printf(seq, "\tAggregator ID: %d\n", ad_info.aggregator_id); seq_printf(seq, "\tNumber of ports: %d\n", ad_info.ports); seq_printf(seq, "\tActor Key: %d\n", ad_info.actor_key); seq_printf(seq, "\tPartner Key: %d\n", ad_info.partner_key); seq_printf(seq, "\tPartner Mac Address: %pM\n", ad_info.partner_system); } } } } static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) { struct bonding *bond = pde_data(file_inode(seq->file)); seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); seq_printf(seq, "MII Status: %s\n", bond_slave_link_status(slave->link)); if (slave->speed == SPEED_UNKNOWN) seq_printf(seq, "Speed: %s\n", "Unknown"); else seq_printf(seq, "Speed: %d Mbps\n", slave->speed); if (slave->duplex == DUPLEX_UNKNOWN) seq_printf(seq, "Duplex: %s\n", "Unknown"); else seq_printf(seq, "Duplex: %s\n", slave->duplex ? "full" : "half"); seq_printf(seq, "Link Failure Count: %u\n", slave->link_failure_count); seq_printf(seq, "Permanent HW addr: %*phC\n", slave->dev->addr_len, slave->perm_hwaddr); seq_printf(seq, "Slave queue ID: %d\n", READ_ONCE(slave->queue_id)); if (BOND_MODE(bond) == BOND_MODE_8023AD) { const struct port *port = &SLAVE_AD_INFO(slave)->port; const struct aggregator *agg = port->aggregator; if (agg) { seq_printf(seq, "Aggregator ID: %d\n", agg->aggregator_identifier); seq_printf(seq, "Actor Churn State: %s\n", bond_3ad_churn_desc(port->sm_churn_actor_state)); seq_printf(seq, "Partner Churn State: %s\n", bond_3ad_churn_desc(port->sm_churn_partner_state)); seq_printf(seq, "Actor Churned Count: %d\n", port->churn_actor_count); seq_printf(seq, "Partner Churned Count: %d\n", port->churn_partner_count); if (capable(CAP_NET_ADMIN)) { seq_puts(seq, "details actor lacp pdu:\n"); seq_printf(seq, " system priority: %d\n", port->actor_system_priority); seq_printf(seq, " system mac address: %pM\n", &port->actor_system); seq_printf(seq, " port key: %d\n", port->actor_oper_port_key); seq_printf(seq, " port priority: %d\n", port->actor_port_priority); seq_printf(seq, " port number: %d\n", port->actor_port_number); seq_printf(seq, " port state: %d\n", port->actor_oper_port_state); seq_puts(seq, "details partner lacp pdu:\n"); seq_printf(seq, " system priority: %d\n", port->partner_oper.system_priority); seq_printf(seq, " system mac address: %pM\n", &port->partner_oper.system); seq_printf(seq, " oper key: %d\n", port->partner_oper.key); seq_printf(seq, " port priority: %d\n", port->partner_oper.port_priority); seq_printf(seq, " port number: %d\n", port->partner_oper.port_number); seq_printf(seq, " port state: %d\n", port->partner_oper.port_state); } } else { seq_puts(seq, "Aggregator ID: N/A\n"); } } } static int bond_info_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) { seq_printf(seq, "%s\n", bond_version); bond_info_show_master(seq); } else bond_info_show_slave(seq, v); return 0; } static const struct seq_operations bond_info_seq_ops = { .start = bond_info_seq_start, .next = bond_info_seq_next, .stop = bond_info_seq_stop, .show = bond_info_seq_show, }; void bond_create_proc_entry(struct bonding *bond) { struct net_device *bond_dev = bond->dev; struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); if (bn->proc_dir) { bond->proc_entry = proc_create_seq_data(bond_dev->name, 0444, bn->proc_dir, &bond_info_seq_ops, bond); if (bond->proc_entry == NULL) netdev_warn(bond_dev, "Cannot create /proc/net/%s/%s\n", DRV_NAME, bond_dev->name); else memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); } } void bond_remove_proc_entry(struct bonding *bond) { struct net_device *bond_dev = bond->dev; struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); if (bn->proc_dir && bond->proc_entry) { remove_proc_entry(bond->proc_file_name, bn->proc_dir); memset(bond->proc_file_name, 0, IFNAMSIZ); bond->proc_entry = NULL; } } /* Create the bonding directory under /proc/net, if doesn't exist yet. * Caller must hold rtnl_lock. */ void __net_init bond_create_proc_dir(struct bond_net *bn) { if (!bn->proc_dir) { bn->proc_dir = proc_mkdir(DRV_NAME, bn->net->proc_net); if (!bn->proc_dir) pr_warn("Warning: Cannot create /proc/net/%s\n", DRV_NAME); } } /* Destroy the bonding directory under /proc/net, if empty. */ void __net_exit bond_destroy_proc_dir(struct bond_net *bn) { if (bn->proc_dir) { remove_proc_entry(DRV_NAME, bn->net->proc_net); bn->proc_dir = NULL; } } |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Queue of folios definitions * * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * See: * * Documentation/core-api/folio_queue.rst * * for a description of the API. */ #ifndef _LINUX_FOLIO_QUEUE_H #define _LINUX_FOLIO_QUEUE_H #include <linux/pagevec.h> #include <linux/mm.h> /* * Segment in a queue of running buffers. Each segment can hold a number of * folios and a portion of the queue can be referenced with the ITER_FOLIOQ * iterator. The possibility exists of inserting non-folio elements into the * queue (such as gaps). * * Explicit prev and next pointers are used instead of a list_head to make it * easier to add segments to tail and remove them from the head without the * need for a lock. */ struct folio_queue { struct folio_batch vec; /* Folios in the queue segment */ u8 orders[PAGEVEC_SIZE]; /* Order of each folio */ struct folio_queue *next; /* Next queue segment or NULL */ struct folio_queue *prev; /* Previous queue segment of NULL */ unsigned long marks; /* 1-bit mark per folio */ unsigned long marks2; /* Second 1-bit mark per folio */ #if PAGEVEC_SIZE > BITS_PER_LONG #error marks is not big enough #endif unsigned int rreq_id; unsigned int debug_id; }; /** * folioq_init - Initialise a folio queue segment * @folioq: The segment to initialise * @rreq_id: The request identifier to use in tracelines. * * Initialise a folio queue segment and set an identifier to be used in traces. * * Note that the folio pointers are left uninitialised. */ static inline void folioq_init(struct folio_queue *folioq, unsigned int rreq_id) { folio_batch_init(&folioq->vec); folioq->next = NULL; folioq->prev = NULL; folioq->marks = 0; folioq->marks2 = 0; folioq->rreq_id = rreq_id; folioq->debug_id = 0; } /** * folioq_nr_slots: Query the capacity of a folio queue segment * @folioq: The segment to query * * Query the number of folios that a particular folio queue segment might hold. * [!] NOTE: This must not be assumed to be the same for every segment! */ static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq) { return PAGEVEC_SIZE; } /** * folioq_count: Query the occupancy of a folio queue segment * @folioq: The segment to query * * Query the number of folios that have been added to a folio queue segment. * Note that this is not decreased as folios are removed from a segment. */ static inline unsigned int folioq_count(struct folio_queue *folioq) { return folio_batch_count(&folioq->vec); } /** * folioq_full: Query if a folio queue segment is full * @folioq: The segment to query * * Query if a folio queue segment is fully occupied. Note that this does not * change if folios are removed from a segment. */ static inline bool folioq_full(struct folio_queue *folioq) { //return !folio_batch_space(&folioq->vec); return folioq_count(folioq) >= folioq_nr_slots(folioq); } /** * folioq_is_marked: Check first folio mark in a folio queue segment * @folioq: The segment to query * @slot: The slot number of the folio to query * * Determine if the first mark is set for the folio in the specified slot in a * folio queue segment. */ static inline bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot) { return test_bit(slot, &folioq->marks); } /** * folioq_mark: Set the first mark on a folio in a folio queue segment * @folioq: The segment to modify * @slot: The slot number of the folio to modify * * Set the first mark for the folio in the specified slot in a folio queue * segment. */ static inline void folioq_mark(struct folio_queue *folioq, unsigned int slot) { set_bit(slot, &folioq->marks); } /** * folioq_unmark: Clear the first mark on a folio in a folio queue segment * @folioq: The segment to modify * @slot: The slot number of the folio to modify * * Clear the first mark for the folio in the specified slot in a folio queue * segment. */ static inline void folioq_unmark(struct folio_queue *folioq, unsigned int slot) { clear_bit(slot, &folioq->marks); } /** * folioq_is_marked2: Check second folio mark in a folio queue segment * @folioq: The segment to query * @slot: The slot number of the folio to query * * Determine if the second mark is set for the folio in the specified slot in a * folio queue segment. */ static inline bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot) { return test_bit(slot, &folioq->marks2); } /** * folioq_mark2: Set the second mark on a folio in a folio queue segment * @folioq: The segment to modify * @slot: The slot number of the folio to modify * * Set the second mark for the folio in the specified slot in a folio queue * segment. */ static inline void folioq_mark2(struct folio_queue *folioq, unsigned int slot) { set_bit(slot, &folioq->marks2); } /** * folioq_unmark2: Clear the second mark on a folio in a folio queue segment * @folioq: The segment to modify * @slot: The slot number of the folio to modify * * Clear the second mark for the folio in the specified slot in a folio queue * segment. */ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot) { clear_bit(slot, &folioq->marks2); } /** * folioq_append: Add a folio to a folio queue segment * @folioq: The segment to add to * @folio: The folio to add * * Add a folio to the tail of the sequence in a folio queue segment, increasing * the occupancy count and returning the slot number for the folio just added. * The folio size is extracted and stored in the queue and the marks are left * unmodified. * * Note that it's left up to the caller to check that the segment capacity will * not be exceeded and to extend the queue. */ static inline unsigned int folioq_append(struct folio_queue *folioq, struct folio *folio) { unsigned int slot = folioq->vec.nr++; folioq->vec.folios[slot] = folio; folioq->orders[slot] = folio_order(folio); return slot; } /** * folioq_append_mark: Add a folio to a folio queue segment * @folioq: The segment to add to * @folio: The folio to add * * Add a folio to the tail of the sequence in a folio queue segment, increasing * the occupancy count and returning the slot number for the folio just added. * The folio size is extracted and stored in the queue, the first mark is set * and and the second and third marks are left unmodified. * * Note that it's left up to the caller to check that the segment capacity will * not be exceeded and to extend the queue. */ static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct folio *folio) { unsigned int slot = folioq->vec.nr++; folioq->vec.folios[slot] = folio; folioq->orders[slot] = folio_order(folio); folioq_mark(folioq, slot); return slot; } /** * folioq_folio: Get a folio from a folio queue segment * @folioq: The segment to access * @slot: The folio slot to access * * Retrieve the folio in the specified slot from a folio queue segment. Note * that no bounds check is made and if the slot hasn't been added into yet, the * pointer will be undefined. If the slot has been cleared, NULL will be * returned. */ static inline struct folio *folioq_folio(const struct folio_queue *folioq, unsigned int slot) { return folioq->vec.folios[slot]; } /** * folioq_folio_order: Get the order of a folio from a folio queue segment * @folioq: The segment to access * @slot: The folio slot to access * * Retrieve the order of the folio in the specified slot from a folio queue * segment. Note that no bounds check is made and if the slot hasn't been * added into yet, the order returned will be 0. */ static inline unsigned int folioq_folio_order(const struct folio_queue *folioq, unsigned int slot) { return folioq->orders[slot]; } /** * folioq_folio_size: Get the size of a folio from a folio queue segment * @folioq: The segment to access * @slot: The folio slot to access * * Retrieve the size of the folio in the specified slot from a folio queue * segment. Note that no bounds check is made and if the slot hasn't been * added into yet, the size returned will be PAGE_SIZE. */ static inline size_t folioq_folio_size(const struct folio_queue *folioq, unsigned int slot) { return PAGE_SIZE << folioq_folio_order(folioq, slot); } /** * folioq_clear: Clear a folio from a folio queue segment * @folioq: The segment to clear * @slot: The folio slot to clear * * Clear a folio from a sequence in a folio queue segment and clear its marks. * The occupancy count is left unchanged. */ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot) { folioq->vec.folios[slot] = NULL; folioq_unmark(folioq, slot); folioq_unmark2(folioq, slot); } #endif /* _LINUX_FOLIO_QUEUE_H */ |
| 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/dir.c * * Copyright (C) 1992 Rick Sladkey * * nfs directory handling functions * * 10 Apr 1996 Added silly rename for unlink --okir * 28 Sep 1996 Improved directory cache --okir * 23 Aug 1997 Claus Heine claus@momo.math.rwth-aachen.de * Re-implemented silly rename for unlink, newly implemented * silly rename for nfs_rename() following the suggestions * of Olaf Kirch (okir) found in this file. * Following Linus comments on my original hack, this version * depends only on the dcache stuff and doesn't touch the inode * layer (iput() and friends). * 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM */ #include <linux/compat.h> #include <linux/module.h> #include <linux/time.h> #include <linux/errno.h> #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/sunrpc/clnt.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/pagemap.h> #include <linux/pagevec.h> #include <linux/namei.h> #include <linux/mount.h> #include <linux/swap.h> #include <linux/sched.h> #include <linux/kmemleak.h> #include <linux/xattr.h> #include <linux/hash.h> #include "delegation.h" #include "iostat.h" #include "internal.h" #include "fscache.h" #include "nfstrace.h" /* #define NFS_DEBUG_VERBOSE 1 */ static int nfs_opendir(struct inode *, struct file *); static int nfs_closedir(struct inode *, struct file *); static int nfs_readdir(struct file *, struct dir_context *); static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); static void nfs_readdir_clear_array(struct folio *); static int nfs_do_create(struct inode *dir, struct dentry *dentry, umode_t mode, int open_flags); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, .read = generic_read_dir, .iterate_shared = nfs_readdir, .open = nfs_opendir, .release = nfs_closedir, .fsync = nfs_fsync_dir, }; const struct address_space_operations nfs_dir_aops = { .free_folio = nfs_readdir_clear_array, }; #define NFS_INIT_DTSIZE PAGE_SIZE static struct nfs_open_dir_context * alloc_nfs_open_dir_context(struct inode *dir) { struct nfs_inode *nfsi = NFS_I(dir); struct nfs_open_dir_context *ctx; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT); if (ctx != NULL) { ctx->attr_gencount = nfsi->attr_gencount; ctx->dtsize = NFS_INIT_DTSIZE; spin_lock(&dir->i_lock); if (list_empty(&nfsi->open_files) && (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) nfs_set_cache_invalid(dir, NFS_INO_INVALID_DATA | NFS_INO_REVAL_FORCED); list_add_tail_rcu(&ctx->list, &nfsi->open_files); memcpy(ctx->verf, nfsi->cookieverf, sizeof(ctx->verf)); spin_unlock(&dir->i_lock); return ctx; } return ERR_PTR(-ENOMEM); } static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx) { spin_lock(&dir->i_lock); list_del_rcu(&ctx->list); spin_unlock(&dir->i_lock); kfree_rcu(ctx, rcu_head); } /* * Open file */ static int nfs_opendir(struct inode *inode, struct file *filp) { int res = 0; struct nfs_open_dir_context *ctx; dfprintk(FILE, "NFS: open dir(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSOPEN); ctx = alloc_nfs_open_dir_context(inode); if (IS_ERR(ctx)) { res = PTR_ERR(ctx); goto out; } filp->private_data = ctx; out: return res; } static int nfs_closedir(struct inode *inode, struct file *filp) { put_nfs_open_dir_context(file_inode(filp), filp->private_data); return 0; } struct nfs_cache_array_entry { u64 cookie; u64 ino; const char *name; unsigned int name_len; unsigned char d_type; }; struct nfs_cache_array { u64 change_attr; u64 last_cookie; unsigned int size; unsigned char folio_full : 1, folio_is_eof : 1, cookies_are_ordered : 1; struct nfs_cache_array_entry array[] __counted_by(size); }; struct nfs_readdir_descriptor { struct file *file; struct folio *folio; struct dir_context *ctx; pgoff_t folio_index; pgoff_t folio_index_max; u64 dir_cookie; u64 last_cookie; loff_t current_index; __be32 verf[NFS_DIR_VERIFIER_SIZE]; unsigned long dir_verifier; unsigned long timestamp; unsigned long gencount; unsigned long attr_gencount; unsigned int cache_entry_index; unsigned int buffer_fills; unsigned int dtsize; bool clear_cache; bool plus; bool eob; bool eof; }; static void nfs_set_dtsize(struct nfs_readdir_descriptor *desc, unsigned int sz) { struct nfs_server *server = NFS_SERVER(file_inode(desc->file)); unsigned int maxsize = server->dtsize; if (sz > maxsize) sz = maxsize; if (sz < NFS_MIN_FILE_IO_SIZE) sz = NFS_MIN_FILE_IO_SIZE; desc->dtsize = sz; } static void nfs_shrink_dtsize(struct nfs_readdir_descriptor *desc) { nfs_set_dtsize(desc, desc->dtsize >> 1); } static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc) { nfs_set_dtsize(desc, desc->dtsize << 1); } static void nfs_readdir_folio_init_array(struct folio *folio, u64 last_cookie, u64 change_attr) { struct nfs_cache_array *array; array = kmap_local_folio(folio, 0); array->change_attr = change_attr; array->last_cookie = last_cookie; array->size = 0; array->folio_full = 0; array->folio_is_eof = 0; array->cookies_are_ordered = 1; kunmap_local(array); } /* * we are freeing strings created by nfs_add_to_readdir_array() */ static void nfs_readdir_clear_array(struct folio *folio) { struct nfs_cache_array *array; unsigned int i; array = kmap_local_folio(folio, 0); for (i = 0; i < array->size; i++) kfree(array->array[i].name); array->size = 0; kunmap_local(array); } static void nfs_readdir_folio_reinit_array(struct folio *folio, u64 last_cookie, u64 change_attr) { nfs_readdir_clear_array(folio); nfs_readdir_folio_init_array(folio, last_cookie, change_attr); } static struct folio * nfs_readdir_folio_array_alloc(u64 last_cookie, gfp_t gfp_flags) { struct folio *folio = folio_alloc(gfp_flags, 0); if (folio) nfs_readdir_folio_init_array(folio, last_cookie, 0); return folio; } static void nfs_readdir_folio_array_free(struct folio *folio) { if (folio) { nfs_readdir_clear_array(folio); folio_put(folio); } } static u64 nfs_readdir_array_index_cookie(struct nfs_cache_array *array) { return array->size == 0 ? array->last_cookie : array->array[0].cookie; } static void nfs_readdir_array_set_eof(struct nfs_cache_array *array) { array->folio_is_eof = 1; array->folio_full = 1; } static bool nfs_readdir_array_is_full(struct nfs_cache_array *array) { return array->folio_full; } /* * the caller is responsible for freeing qstr.name * when called by nfs_readdir_add_to_array, the strings will be freed in * nfs_clear_readdir_array() */ static const char *nfs_readdir_copy_name(const char *name, unsigned int len) { const char *ret = kmemdup_nul(name, len, GFP_KERNEL); /* * Avoid a kmemleak false positive. The pointer to the name is stored * in a page cache page which kmemleak does not scan. */ if (ret != NULL) kmemleak_not_leak(ret); return ret; } static size_t nfs_readdir_array_maxentries(void) { return (PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry); } /* * Check that the next array entry lies entirely within the page bounds */ static int nfs_readdir_array_can_expand(struct nfs_cache_array *array) { if (array->folio_full) return -ENOSPC; if (array->size == nfs_readdir_array_maxentries()) { array->folio_full = 1; return -ENOSPC; } return 0; } static int nfs_readdir_folio_array_append(struct folio *folio, const struct nfs_entry *entry, u64 *cookie) { struct nfs_cache_array *array; struct nfs_cache_array_entry *cache_entry; const char *name; int ret = -ENOMEM; name = nfs_readdir_copy_name(entry->name, entry->len); array = kmap_local_folio(folio, 0); if (!name) goto out; ret = nfs_readdir_array_can_expand(array); if (ret) { kfree(name); goto out; } array->size++; cache_entry = &array->array[array->size - 1]; cache_entry->cookie = array->last_cookie; cache_entry->ino = entry->ino; cache_entry->d_type = entry->d_type; cache_entry->name_len = entry->len; cache_entry->name = name; array->last_cookie = entry->cookie; if (array->last_cookie <= cache_entry->cookie) array->cookies_are_ordered = 0; if (entry->eof != 0) nfs_readdir_array_set_eof(array); out: *cookie = array->last_cookie; kunmap_local(array); return ret; } #define NFS_READDIR_COOKIE_MASK (U32_MAX >> 14) /* * Hash algorithm allowing content addressible access to sequences * of directory cookies. Content is addressed by the value of the * cookie index of the first readdir entry in a page. * * We select only the first 18 bits to avoid issues with excessive * memory use for the page cache XArray. 18 bits should allow the caching * of 262144 pages of sequences of readdir entries. Since each page holds * 127 readdir entries for a typical 64-bit system, that works out to a * cache of ~ 33 million entries per directory. */ static pgoff_t nfs_readdir_folio_cookie_hash(u64 cookie) { if (cookie == 0) return 0; return hash_64(cookie, 18); } static bool nfs_readdir_folio_validate(struct folio *folio, u64 last_cookie, u64 change_attr) { struct nfs_cache_array *array = kmap_local_folio(folio, 0); int ret = true; if (array->change_attr != change_attr) ret = false; if (nfs_readdir_array_index_cookie(array) != last_cookie) ret = false; kunmap_local(array); return ret; } static void nfs_readdir_folio_unlock_and_put(struct folio *folio) { folio_unlock(folio); folio_put(folio); } static void nfs_readdir_folio_init_and_validate(struct folio *folio, u64 cookie, u64 change_attr) { if (folio_test_uptodate(folio)) { if (nfs_readdir_folio_validate(folio, cookie, change_attr)) return; nfs_readdir_clear_array(folio); } nfs_readdir_folio_init_array(folio, cookie, change_attr); folio_mark_uptodate(folio); } static struct folio *nfs_readdir_folio_get_locked(struct address_space *mapping, u64 cookie, u64 change_attr) { pgoff_t index = nfs_readdir_folio_cookie_hash(cookie); struct folio *folio; folio = filemap_grab_folio(mapping, index); if (IS_ERR(folio)) return NULL; nfs_readdir_folio_init_and_validate(folio, cookie, change_attr); return folio; } static u64 nfs_readdir_folio_last_cookie(struct folio *folio) { struct nfs_cache_array *array; u64 ret; array = kmap_local_folio(folio, 0); ret = array->last_cookie; kunmap_local(array); return ret; } static bool nfs_readdir_folio_needs_filling(struct folio *folio) { struct nfs_cache_array *array; bool ret; array = kmap_local_folio(folio, 0); ret = !nfs_readdir_array_is_full(array); kunmap_local(array); return ret; } static void nfs_readdir_folio_set_eof(struct folio *folio) { struct nfs_cache_array *array; array = kmap_local_folio(folio, 0); nfs_readdir_array_set_eof(array); kunmap_local(array); } static struct folio *nfs_readdir_folio_get_next(struct address_space *mapping, u64 cookie, u64 change_attr) { pgoff_t index = nfs_readdir_folio_cookie_hash(cookie); struct folio *folio; folio = __filemap_get_folio(mapping, index, FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) return NULL; nfs_readdir_folio_init_and_validate(folio, cookie, change_attr); if (nfs_readdir_folio_last_cookie(folio) != cookie) nfs_readdir_folio_reinit_array(folio, cookie, change_attr); return folio; } static inline int is_32bit_api(void) { #ifdef CONFIG_COMPAT return in_compat_syscall(); #else return (BITS_PER_LONG == 32); #endif } static bool nfs_readdir_use_cookie(const struct file *filp) { if ((filp->f_mode & FMODE_32BITHASH) || (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) return false; return true; } static void nfs_readdir_seek_next_array(struct nfs_cache_array *array, struct nfs_readdir_descriptor *desc) { if (array->folio_full) { desc->last_cookie = array->last_cookie; desc->current_index += array->size; desc->cache_entry_index = 0; desc->folio_index++; } else desc->last_cookie = nfs_readdir_array_index_cookie(array); } static void nfs_readdir_rewind_search(struct nfs_readdir_descriptor *desc) { desc->current_index = 0; desc->last_cookie = 0; desc->folio_index = 0; } static int nfs_readdir_search_for_pos(struct nfs_cache_array *array, struct nfs_readdir_descriptor *desc) { loff_t diff = desc->ctx->pos - desc->current_index; unsigned int index; if (diff < 0) goto out_eof; if (diff >= array->size) { if (array->folio_is_eof) goto out_eof; nfs_readdir_seek_next_array(array, desc); return -EAGAIN; } index = (unsigned int)diff; desc->dir_cookie = array->array[index].cookie; desc->cache_entry_index = index; return 0; out_eof: desc->eof = true; return -EBADCOOKIE; } static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array, u64 cookie) { if (!array->cookies_are_ordered) return true; /* Optimisation for monotonically increasing cookies */ if (cookie >= array->last_cookie) return false; if (array->size && cookie < array->array[0].cookie) return false; return true; } static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, struct nfs_readdir_descriptor *desc) { unsigned int i; int status = -EAGAIN; if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie)) goto check_eof; for (i = 0; i < array->size; i++) { if (array->array[i].cookie == desc->dir_cookie) { if (nfs_readdir_use_cookie(desc->file)) desc->ctx->pos = desc->dir_cookie; else desc->ctx->pos = desc->current_index + i; desc->cache_entry_index = i; return 0; } } check_eof: if (array->folio_is_eof) { status = -EBADCOOKIE; if (desc->dir_cookie == array->last_cookie) desc->eof = true; } else nfs_readdir_seek_next_array(array, desc); return status; } static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc) { struct nfs_cache_array *array; int status; array = kmap_local_folio(desc->folio, 0); if (desc->dir_cookie == 0) status = nfs_readdir_search_for_pos(array, desc); else status = nfs_readdir_search_for_cookie(array, desc); kunmap_local(array); return status; } /* Fill a page with xdr information before transferring to the cache page */ static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc, __be32 *verf, u64 cookie, struct page **pages, size_t bufsize, __be32 *verf_res) { struct inode *inode = file_inode(desc->file); struct nfs_readdir_arg arg = { .dentry = file_dentry(desc->file), .cred = desc->file->f_cred, .verf = verf, .cookie = cookie, .pages = pages, .page_len = bufsize, .plus = desc->plus, }; struct nfs_readdir_res res = { .verf = verf_res, }; unsigned long timestamp, gencount; int error; again: timestamp = jiffies; gencount = nfs_inc_attr_generation_counter(); desc->dir_verifier = nfs_save_change_attribute(inode); error = NFS_PROTO(inode)->readdir(&arg, &res); if (error < 0) { /* We requested READDIRPLUS, but the server doesn't grok it */ if (error == -ENOTSUPP && desc->plus) { NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; desc->plus = arg.plus = false; goto again; } goto error; } desc->timestamp = timestamp; desc->gencount = gencount; error: return error; } static int xdr_decode(struct nfs_readdir_descriptor *desc, struct nfs_entry *entry, struct xdr_stream *xdr) { struct inode *inode = file_inode(desc->file); int error; error = NFS_PROTO(inode)->decode_dirent(xdr, entry, desc->plus); if (error) return error; entry->fattr->time_start = desc->timestamp; entry->fattr->gencount = desc->gencount; return 0; } /* Match file and dirent using either filehandle or fileid * Note: caller is responsible for checking the fsid */ static int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) { struct inode *inode; struct nfs_inode *nfsi; if (d_really_is_negative(dentry)) return 0; inode = d_inode(dentry); if (is_bad_inode(inode) || NFS_STALE(inode)) return 0; nfsi = NFS_I(inode); if (entry->fattr->fileid != nfsi->fileid) return 0; if (entry->fh->size && nfs_compare_fh(entry->fh, &nfsi->fh) != 0) return 0; return 1; } #define NFS_READDIR_CACHE_USAGE_THRESHOLD (8UL) static bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx, unsigned int cache_hits, unsigned int cache_misses) { if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) return false; if (NFS_SERVER(dir)->flags & NFS_MOUNT_FORCE_RDIRPLUS) return true; if (ctx->pos == 0 || cache_hits + cache_misses > NFS_READDIR_CACHE_USAGE_THRESHOLD) return true; return false; } /* * This function is called by the getattr code to request the * use of readdirplus to accelerate any future lookups in the same * directory. */ void nfs_readdir_record_entry_cache_hit(struct inode *dir) { struct nfs_inode *nfsi = NFS_I(dir); struct nfs_open_dir_context *ctx; if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && S_ISDIR(dir->i_mode)) { rcu_read_lock(); list_for_each_entry_rcu (ctx, &nfsi->open_files, list) atomic_inc(&ctx->cache_hits); rcu_read_unlock(); } } /* * This function is mainly for use by nfs_getattr(). * * If this is an 'ls -l', we want to force use of readdirplus. */ void nfs_readdir_record_entry_cache_miss(struct inode *dir) { struct nfs_inode *nfsi = NFS_I(dir); struct nfs_open_dir_context *ctx; if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && S_ISDIR(dir->i_mode)) { rcu_read_lock(); list_for_each_entry_rcu (ctx, &nfsi->open_files, list) atomic_inc(&ctx->cache_misses); rcu_read_unlock(); } } static void nfs_lookup_advise_force_readdirplus(struct inode *dir, unsigned int flags) { if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) return; if (flags & (LOOKUP_EXCL | LOOKUP_PARENT | LOOKUP_REVAL)) return; nfs_readdir_record_entry_cache_miss(dir); } static void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry, unsigned long dir_verifier) { struct qstr filename = QSTR_INIT(entry->name, entry->len); DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); struct dentry *dentry; struct dentry *alias; struct inode *inode; int status; if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID)) return; if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID)) return; if (filename.len == 0) return; /* Validate that the name doesn't contain any illegal '\0' */ if (strnlen(filename.name, filename.len) != filename.len) return; /* ...or '/' */ if (strnchr(filename.name, filename.len, '/')) return; if (filename.name[0] == '.') { if (filename.len == 1) return; if (filename.len == 2 && filename.name[1] == '.') return; } filename.hash = full_name_hash(parent, filename.name, filename.len); dentry = d_lookup(parent, &filename); again: if (!dentry) { dentry = d_alloc_parallel(parent, &filename, &wq); if (IS_ERR(dentry)) return; } if (!d_in_lookup(dentry)) { /* Is there a mountpoint here? If so, just exit */ if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid, &entry->fattr->fsid)) goto out; if (nfs_same_file(dentry, entry)) { if (!entry->fh->size) goto out; nfs_set_verifier(dentry, dir_verifier); status = nfs_refresh_inode(d_inode(dentry), entry->fattr); if (!status) nfs_setsecurity(d_inode(dentry), entry->fattr); trace_nfs_readdir_lookup_revalidate(d_inode(parent), dentry, 0, status); goto out; } else { trace_nfs_readdir_lookup_revalidate_failed( d_inode(parent), dentry, 0); d_invalidate(dentry); dput(dentry); dentry = NULL; goto again; } } if (!entry->fh->size) { d_lookup_done(dentry); goto out; } inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); alias = d_splice_alias(inode, dentry); d_lookup_done(dentry); if (alias) { if (IS_ERR(alias)) goto out; dput(dentry); dentry = alias; } nfs_set_verifier(dentry, dir_verifier); trace_nfs_readdir_lookup(d_inode(parent), dentry, 0); out: dput(dentry); } static int nfs_readdir_entry_decode(struct nfs_readdir_descriptor *desc, struct nfs_entry *entry, struct xdr_stream *stream) { int ret; if (entry->fattr->label) entry->fattr->label->len = NFS4_MAXLABELLEN; ret = xdr_decode(desc, entry, stream); if (ret || !desc->plus) return ret; nfs_prime_dcache(file_dentry(desc->file), entry, desc->dir_verifier); return 0; } /* Perform conversion from xdr to cache array */ static int nfs_readdir_folio_filler(struct nfs_readdir_descriptor *desc, struct nfs_entry *entry, struct page **xdr_pages, unsigned int buflen, struct folio **arrays, size_t narrays, u64 change_attr) { struct address_space *mapping = desc->file->f_mapping; struct folio *new, *folio = *arrays; struct xdr_stream stream; struct folio *scratch; struct xdr_buf buf; u64 cookie; int status; scratch = folio_alloc(GFP_KERNEL, 0); if (scratch == NULL) return -ENOMEM; xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); xdr_set_scratch_folio(&stream, scratch); do { status = nfs_readdir_entry_decode(desc, entry, &stream); if (status != 0) break; status = nfs_readdir_folio_array_append(folio, entry, &cookie); if (status != -ENOSPC) continue; if (folio->mapping != mapping) { if (!--narrays) break; new = nfs_readdir_folio_array_alloc(cookie, GFP_KERNEL); if (!new) break; arrays++; *arrays = folio = new; } else { new = nfs_readdir_folio_get_next(mapping, cookie, change_attr); if (!new) break; if (folio != *arrays) nfs_readdir_folio_unlock_and_put(folio); folio = new; } desc->folio_index_max++; status = nfs_readdir_folio_array_append(folio, entry, &cookie); } while (!status && !entry->eof); switch (status) { case -EBADCOOKIE: if (!entry->eof) break; nfs_readdir_folio_set_eof(folio); fallthrough; case -EAGAIN: status = 0; break; case -ENOSPC: status = 0; if (!desc->plus) break; while (!nfs_readdir_entry_decode(desc, entry, &stream)) ; } if (folio != *arrays) nfs_readdir_folio_unlock_and_put(folio); folio_put(scratch); return status; } static void nfs_readdir_free_pages(struct page **pages, size_t npages) { while (npages--) put_page(pages[npages]); kfree(pages); } /* * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call * to nfs_readdir_free_pages() */ static struct page **nfs_readdir_alloc_pages(size_t npages) { struct page **pages; size_t i; pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL); if (!pages) return NULL; for (i = 0; i < npages; i++) { struct page *page = alloc_page(GFP_KERNEL); if (page == NULL) goto out_freepages; pages[i] = page; } return pages; out_freepages: nfs_readdir_free_pages(pages, i); return NULL; } static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, __be32 *verf_arg, __be32 *verf_res, struct folio **arrays, size_t narrays) { u64 change_attr; struct page **pages; struct folio *folio = *arrays; struct nfs_entry *entry; size_t array_size; struct inode *inode = file_inode(desc->file); unsigned int dtsize = desc->dtsize; unsigned int pglen; int status = -ENOMEM; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; entry->cookie = nfs_readdir_folio_last_cookie(folio); entry->fh = nfs_alloc_fhandle(); entry->fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); entry->server = NFS_SERVER(inode); if (entry->fh == NULL || entry->fattr == NULL) goto out; array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT; pages = nfs_readdir_alloc_pages(array_size); if (!pages) goto out; change_attr = inode_peek_iversion_raw(inode); status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, pages, dtsize, verf_res); if (status < 0) goto free_pages; pglen = status; if (pglen != 0) status = nfs_readdir_folio_filler(desc, entry, pages, pglen, arrays, narrays, change_attr); else nfs_readdir_folio_set_eof(folio); desc->buffer_fills++; free_pages: nfs_readdir_free_pages(pages, array_size); out: nfs_free_fattr(entry->fattr); nfs_free_fhandle(entry->fh); kfree(entry); return status; } static void nfs_readdir_folio_put(struct nfs_readdir_descriptor *desc) { folio_put(desc->folio); desc->folio = NULL; } static void nfs_readdir_folio_unlock_and_put_cached(struct nfs_readdir_descriptor *desc) { folio_unlock(desc->folio); nfs_readdir_folio_put(desc); } static struct folio * nfs_readdir_folio_get_cached(struct nfs_readdir_descriptor *desc) { struct address_space *mapping = desc->file->f_mapping; u64 change_attr = inode_peek_iversion_raw(mapping->host); u64 cookie = desc->last_cookie; struct folio *folio; folio = nfs_readdir_folio_get_locked(mapping, cookie, change_attr); if (!folio) return NULL; if (desc->clear_cache && !nfs_readdir_folio_needs_filling(folio)) nfs_readdir_folio_reinit_array(folio, cookie, change_attr); return folio; } /* * Returns 0 if desc->dir_cookie was found on page desc->page_index * and locks the page to prevent removal from the page cache. */ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) { struct inode *inode = file_inode(desc->file); struct nfs_inode *nfsi = NFS_I(inode); __be32 verf[NFS_DIR_VERIFIER_SIZE]; int res; desc->folio = nfs_readdir_folio_get_cached(desc); if (!desc->folio) return -ENOMEM; if (nfs_readdir_folio_needs_filling(desc->folio)) { /* Grow the dtsize if we had to go back for more pages */ if (desc->folio_index == desc->folio_index_max) nfs_grow_dtsize(desc); desc->folio_index_max = desc->folio_index; trace_nfs_readdir_cache_fill(desc->file, nfsi->cookieverf, desc->last_cookie, desc->folio->index, desc->dtsize); res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf, &desc->folio, 1); if (res < 0) { nfs_readdir_folio_unlock_and_put_cached(desc); trace_nfs_readdir_cache_fill_done(inode, res); if (res == -EBADCOOKIE || res == -ENOTSYNC) { invalidate_inode_pages2(desc->file->f_mapping); nfs_readdir_rewind_search(desc); trace_nfs_readdir_invalidate_cache_range( inode, 0, MAX_LFS_FILESIZE); return -EAGAIN; } return res; } /* * Set the cookie verifier if the page cache was empty */ if (desc->last_cookie == 0 && memcmp(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf))) { memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf)); invalidate_inode_pages2_range(desc->file->f_mapping, 1, -1); trace_nfs_readdir_invalidate_cache_range( inode, 1, MAX_LFS_FILESIZE); } desc->clear_cache = false; } res = nfs_readdir_search_array(desc); if (res == 0) return 0; nfs_readdir_folio_unlock_and_put_cached(desc); return res; } /* Search for desc->dir_cookie from the beginning of the page cache */ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc) { int res; do { res = find_and_lock_cache_page(desc); } while (res == -EAGAIN); return res; } #define NFS_READDIR_CACHE_MISS_THRESHOLD (16UL) /* * Once we've found the start of the dirent within a page: fill 'er up... */ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc, const __be32 *verf) { struct file *file = desc->file; struct nfs_cache_array *array; unsigned int i; bool first_emit = !desc->dir_cookie; array = kmap_local_folio(desc->folio, 0); for (i = desc->cache_entry_index; i < array->size; i++) { struct nfs_cache_array_entry *ent; /* * nfs_readdir_handle_cache_misses return force clear at * (cache_misses > NFS_READDIR_CACHE_MISS_THRESHOLD) for * readdir heuristic, NFS_READDIR_CACHE_MISS_THRESHOLD + 1 * entries need be emitted here. */ if (first_emit && i > NFS_READDIR_CACHE_MISS_THRESHOLD + 2) { desc->eob = true; break; } ent = &array->array[i]; if (!dir_emit(desc->ctx, ent->name, ent->name_len, nfs_compat_user_ino64(ent->ino), ent->d_type)) { desc->eob = true; break; } memcpy(desc->verf, verf, sizeof(desc->verf)); if (i == array->size - 1) { desc->dir_cookie = array->last_cookie; nfs_readdir_seek_next_array(array, desc); } else { desc->dir_cookie = array->array[i + 1].cookie; desc->last_cookie = array->array[0].cookie; } if (nfs_readdir_use_cookie(file)) desc->ctx->pos = desc->dir_cookie; else desc->ctx->pos++; } if (array->folio_is_eof) desc->eof = !desc->eob; kunmap_local(array); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n", (unsigned long long)desc->dir_cookie); } /* * If we cannot find a cookie in our cache, we suspect that this is * because it points to a deleted file, so we ask the server to return * whatever it thinks is the next entry. We then feed this to filldir. * If all goes well, we should then be able to find our way round the * cache on the next call to readdir_search_pagecache(); * * NOTE: we cannot add the anonymous page to the pagecache because * the data it contains might not be page aligned. Besides, * we should already have a complete representation of the * directory in the page cache by the time we get here. */ static int uncached_readdir(struct nfs_readdir_descriptor *desc) { struct folio **arrays; size_t i, sz = 512; __be32 verf[NFS_DIR_VERIFIER_SIZE]; int status = -ENOMEM; dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %llu\n", (unsigned long long)desc->dir_cookie); arrays = kcalloc(sz, sizeof(*arrays), GFP_KERNEL); if (!arrays) goto out; arrays[0] = nfs_readdir_folio_array_alloc(desc->dir_cookie, GFP_KERNEL); if (!arrays[0]) goto out; desc->folio_index = 0; desc->cache_entry_index = 0; desc->last_cookie = desc->dir_cookie; desc->folio_index_max = 0; trace_nfs_readdir_uncached(desc->file, desc->verf, desc->last_cookie, -1, desc->dtsize); status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz); if (status < 0) { trace_nfs_readdir_uncached_done(file_inode(desc->file), status); goto out_free; } for (i = 0; !desc->eob && i < sz && arrays[i]; i++) { desc->folio = arrays[i]; nfs_do_filldir(desc, verf); } desc->folio = NULL; /* * Grow the dtsize if we have to go back for more pages, * or shrink it if we're reading too many. */ if (!desc->eof) { if (!desc->eob) nfs_grow_dtsize(desc); else if (desc->buffer_fills == 1 && i < (desc->folio_index_max >> 1)) nfs_shrink_dtsize(desc); } out_free: for (i = 0; i < sz && arrays[i]; i++) nfs_readdir_folio_array_free(arrays[i]); out: if (!nfs_readdir_use_cookie(desc->file)) nfs_readdir_rewind_search(desc); desc->folio_index_max = -1; kfree(arrays); dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); return status; } static bool nfs_readdir_handle_cache_misses(struct inode *inode, struct nfs_readdir_descriptor *desc, unsigned int cache_misses, bool force_clear) { if (desc->ctx->pos == 0 || !desc->plus) return false; if (cache_misses <= NFS_READDIR_CACHE_MISS_THRESHOLD && !force_clear) return false; trace_nfs_readdir_force_readdirplus(inode); return true; } /* The file offset position represents the dirent entry number. A last cookie cache takes care of the common case of reading the whole directory. */ static int nfs_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file_dentry(file); struct inode *inode = d_inode(dentry); struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_dir_context *dir_ctx = file->private_data; struct nfs_readdir_descriptor *desc; unsigned int cache_hits, cache_misses; bool force_clear; int res; dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", file, (long long)ctx->pos); nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); /* * ctx->pos points to the dirent entry number. * *desc->dir_cookie has the cookie for the next entry. We have * to either find the entry with the appropriate number or * revalidate the cookie. */ nfs_revalidate_mapping(inode, file->f_mapping); res = -ENOMEM; desc = kzalloc(sizeof(*desc), GFP_KERNEL); if (!desc) goto out; desc->file = file; desc->ctx = ctx; desc->folio_index_max = -1; spin_lock(&file->f_lock); desc->dir_cookie = dir_ctx->dir_cookie; desc->folio_index = dir_ctx->page_index; desc->last_cookie = dir_ctx->last_cookie; desc->attr_gencount = dir_ctx->attr_gencount; desc->eof = dir_ctx->eof; nfs_set_dtsize(desc, dir_ctx->dtsize); memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf)); cache_hits = atomic_xchg(&dir_ctx->cache_hits, 0); cache_misses = atomic_xchg(&dir_ctx->cache_misses, 0); force_clear = dir_ctx->force_clear; spin_unlock(&file->f_lock); if (desc->eof) { res = 0; goto out_free; } desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits, cache_misses); force_clear = nfs_readdir_handle_cache_misses(inode, desc, cache_misses, force_clear); desc->clear_cache = force_clear; do { res = readdir_search_pagecache(desc); if (res == -EBADCOOKIE) { res = 0; /* This means either end of directory */ if (desc->dir_cookie && !desc->eof) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc); if (res == 0) continue; if (res == -EBADCOOKIE || res == -ENOTSYNC) res = 0; } break; } if (res == -ETOOSMALL && desc->plus) { nfs_zap_caches(inode); desc->plus = false; desc->eof = false; continue; } if (res < 0) break; nfs_do_filldir(desc, nfsi->cookieverf); nfs_readdir_folio_unlock_and_put_cached(desc); if (desc->folio_index == desc->folio_index_max) desc->clear_cache = force_clear; } while (!desc->eob && !desc->eof); spin_lock(&file->f_lock); dir_ctx->dir_cookie = desc->dir_cookie; dir_ctx->last_cookie = desc->last_cookie; dir_ctx->attr_gencount = desc->attr_gencount; dir_ctx->page_index = desc->folio_index; dir_ctx->force_clear = force_clear; dir_ctx->eof = desc->eof; dir_ctx->dtsize = desc->dtsize; memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf)); spin_unlock(&file->f_lock); out_free: kfree(desc); out: dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res); return res; } static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) { struct nfs_open_dir_context *dir_ctx = filp->private_data; dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n", filp, offset, whence); switch (whence) { default: return -EINVAL; case SEEK_SET: if (offset < 0) return -EINVAL; spin_lock(&filp->f_lock); break; case SEEK_CUR: if (offset == 0) return filp->f_pos; spin_lock(&filp->f_lock); offset += filp->f_pos; if (offset < 0) { spin_unlock(&filp->f_lock); return -EINVAL; } } if (offset != filp->f_pos) { filp->f_pos = offset; dir_ctx->page_index = 0; if (!nfs_readdir_use_cookie(filp)) { dir_ctx->dir_cookie = 0; dir_ctx->last_cookie = 0; } else { dir_ctx->dir_cookie = offset; dir_ctx->last_cookie = offset; } dir_ctx->eof = false; } spin_unlock(&filp->f_lock); return offset; } /* * All directory operations under NFS are synchronous, so fsync() * is a dummy operation. */ static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end, int datasync) { dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync); nfs_inc_stats(file_inode(filp), NFSIOS_VFSFSYNC); return 0; } /** * nfs_force_lookup_revalidate - Mark the directory as having changed * @dir: pointer to directory inode * * This forces the revalidation code in nfs_lookup_revalidate() to do a * full lookup on all child dentries of 'dir' whenever a change occurs * on the server that might have invalidated our dcache. * * Note that we reserve bit '0' as a tag to let us know when a dentry * was revalidated while holding a delegation on its inode. * * The caller should be holding dir->i_lock */ void nfs_force_lookup_revalidate(struct inode *dir) { NFS_I(dir)->cache_change_attribute += 2; } EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate); /** * nfs_verify_change_attribute - Detects NFS remote directory changes * @dir: pointer to parent directory inode * @verf: previously saved change attribute * * Return "false" if the verifiers doesn't match the change attribute. * This would usually indicate that the directory contents have changed on * the server, and that any dentries need revalidating. */ static bool nfs_verify_change_attribute(struct inode *dir, unsigned long verf) { return (verf & ~1UL) == nfs_save_change_attribute(dir); } static void nfs_set_verifier_delegated(unsigned long *verf) { *verf |= 1UL; } #if IS_ENABLED(CONFIG_NFS_V4) static void nfs_unset_verifier_delegated(unsigned long *verf) { *verf &= ~1UL; } #endif /* IS_ENABLED(CONFIG_NFS_V4) */ static bool nfs_test_verifier_delegated(unsigned long verf) { return verf & 1; } static bool nfs_verifier_is_delegated(struct dentry *dentry) { return nfs_test_verifier_delegated(dentry->d_time); } static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf) { struct inode *inode = d_inode(dentry); struct inode *dir = d_inode_rcu(dentry->d_parent); if (!dir || !nfs_verify_change_attribute(dir, verf)) return; if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0)) nfs_set_verifier_delegated(&verf); dentry->d_time = verf; } /** * nfs_set_verifier - save a parent directory verifier in the dentry * @dentry: pointer to dentry * @verf: verifier to save * * Saves the parent directory verifier in @dentry. If the inode has * a delegation, we also tag the dentry as having been revalidated * while holding a delegation so that we know we don't have to * look it up again after a directory change. */ void nfs_set_verifier(struct dentry *dentry, unsigned long verf) { spin_lock(&dentry->d_lock); nfs_set_verifier_locked(dentry, verf); spin_unlock(&dentry->d_lock); } EXPORT_SYMBOL_GPL(nfs_set_verifier); #if IS_ENABLED(CONFIG_NFS_V4) /** * nfs_clear_verifier_delegated - clear the dir verifier delegation tag * @inode: pointer to inode * * Iterates through the dentries in the inode alias list and clears * the tag used to indicate that the dentry has been revalidated * while holding a delegation. * This function is intended for use when the delegation is being * returned or revoked. */ void nfs_clear_verifier_delegated(struct inode *inode) { struct dentry *alias; if (!inode) return; spin_lock(&inode->i_lock); hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { spin_lock(&alias->d_lock); nfs_unset_verifier_delegated(&alias->d_time); spin_unlock(&alias->d_lock); } spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_clear_verifier_delegated); #endif /* IS_ENABLED(CONFIG_NFS_V4) */ static int nfs_dentry_verify_change(struct inode *dir, struct dentry *dentry) { if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE) && d_really_is_negative(dentry)) return dentry->d_time == inode_peek_iversion_raw(dir); return nfs_verify_change_attribute(dir, dentry->d_time); } /* * A check for whether or not the parent directory has changed. * In the case it has, we assume that the dentries are untrustworthy * and may need to be looked up again. * If rcu_walk prevents us from performing a full check, return 0. */ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry, int rcu_walk) { if (IS_ROOT(dentry)) return 1; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) return 0; if (!nfs_dentry_verify_change(dir, dentry)) return 0; /* Revalidate nfsi->cache_change_attribute before we declare a match */ if (nfs_mapping_need_revalidate_inode(dir)) { if (rcu_walk) return 0; if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) return 0; } if (!nfs_dentry_verify_change(dir, dentry)) return 0; return 1; } /* * Use intent information to check whether or not we're going to do * an O_EXCL create using this path component. */ static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags) { if (NFS_PROTO(dir)->version == 2) return 0; return (flags & (LOOKUP_CREATE | LOOKUP_EXCL)) == (LOOKUP_CREATE | LOOKUP_EXCL); } /* * Inode and filehandle revalidation for lookups. * * We force revalidation in the cases where the VFS sets LOOKUP_REVAL, * or if the intent information indicates that we're about to open this * particular file and the "nocto" mount flag is not set. * */ static int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags) { struct nfs_server *server = NFS_SERVER(inode); int ret; if (IS_AUTOMOUNT(inode)) return 0; if (flags & LOOKUP_OPEN) { switch (inode->i_mode & S_IFMT) { case S_IFREG: /* A NFSv4 OPEN will revalidate later */ if (server->caps & NFS_CAP_ATOMIC_OPEN) goto out; fallthrough; case S_IFDIR: if (server->flags & NFS_MOUNT_NOCTO) break; /* NFS close-to-open cache consistency validation */ goto out_force; } } /* VFS wants an on-the-wire revalidation */ if (flags & LOOKUP_REVAL) goto out_force; out: if (inode->i_nlink > 0 || (inode->i_nlink == 0 && test_bit(NFS_INO_PRESERVE_UNLINKED, &NFS_I(inode)->flags))) return 0; else return -ESTALE; out_force: if (flags & LOOKUP_RCU) return -ECHILD; ret = __nfs_revalidate_inode(server, inode); if (ret != 0) return ret; goto out; } static void nfs_mark_dir_for_revalidate(struct inode *inode) { spin_lock(&inode->i_lock); nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE); spin_unlock(&inode->i_lock); } /* * We judge how long we want to trust negative * dentries by looking at the parent inode mtime. * * If parent mtime has changed, we revalidate, else we wait for a * period corresponding to the parent's attribute cache timeout value. * * If LOOKUP_RCU prevents us from performing a full check, return 1 * suggesting a reval is needed. * * Note that when creating a new file, or looking up a rename target, * then it shouldn't be necessary to revalidate a negative dentry. */ static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, unsigned int flags) { if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; /* Case insensitive server? Revalidate negative dentries */ if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) return 1; return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU); } static int nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry, struct inode *inode, int error) { switch (error) { case 1: break; case -ETIMEDOUT: if (inode && (IS_ROOT(dentry) || NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)) error = 1; break; case -ESTALE: case -ENOENT: error = 0; fallthrough; default: /* * We can't d_drop the root of a disconnected tree: * its d_hash is on the s_anon list and d_drop() would hide * it from shrink_dcache_for_unmount(), leading to busy * inodes on unmount and further oopses. */ if (inode && IS_ROOT(dentry)) error = 1; break; } trace_nfs_lookup_revalidate_exit(dir, dentry, 0, error); return error; } static int nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry, unsigned int flags) { int ret = 1; if (nfs_neg_need_reval(dir, dentry, flags)) { if (flags & LOOKUP_RCU) return -ECHILD; ret = 0; } return nfs_lookup_revalidate_done(dir, dentry, NULL, ret); } static int nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry, struct inode *inode) { nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); return nfs_lookup_revalidate_done(dir, dentry, inode, 1); } static int nfs_lookup_revalidate_dentry(struct inode *dir, const struct qstr *name, struct dentry *dentry, struct inode *inode, unsigned int flags) { struct nfs_fh *fhandle; struct nfs_fattr *fattr; unsigned long dir_verifier; int ret; trace_nfs_lookup_revalidate_enter(dir, dentry, flags); ret = -ENOMEM; fhandle = nfs_alloc_fhandle(); fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); if (fhandle == NULL || fattr == NULL) goto out; dir_verifier = nfs_save_change_attribute(dir); ret = NFS_PROTO(dir)->lookup(dir, dentry, name, fhandle, fattr); if (ret < 0) goto out; /* Request help from readdirplus */ nfs_lookup_advise_force_readdirplus(dir, flags); ret = 0; if (nfs_compare_fh(NFS_FH(inode), fhandle)) goto out; if (nfs_refresh_inode(inode, fattr) < 0) goto out; nfs_setsecurity(inode, fattr); nfs_set_verifier(dentry, dir_verifier); ret = 1; out: nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); /* * If the lookup failed despite the dentry change attribute being * a match, then we should revalidate the directory cache. */ if (!ret && nfs_dentry_verify_change(dir, dentry)) nfs_mark_dir_for_revalidate(dir); return nfs_lookup_revalidate_done(dir, dentry, inode, ret); } /* * This is called every time the dcache has a lookup hit, * and we should check whether we can really trust that * lookup. * * NOTE! The hit can be a negative hit too, don't assume * we have an inode! * * If the parent directory is seen to have changed, we throw out the * cached dentry and do a new lookup. */ static int nfs_do_lookup_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) { struct inode *inode; int error = 0; nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = d_inode(dentry); if (!inode) return nfs_lookup_revalidate_negative(dir, dentry, flags); if (is_bad_inode(inode)) { dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", __func__, dentry); goto out_bad; } if ((flags & LOOKUP_RENAME_TARGET) && d_count(dentry) < 2 && nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) goto out_bad; if (nfs_verifier_is_delegated(dentry)) return nfs_lookup_revalidate_delegated(dir, dentry, inode); /* Force a full look up iff the parent directory has changed */ if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) && nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { error = nfs_lookup_verify_inode(inode, flags); if (error) { if (error == -ESTALE) nfs_mark_dir_for_revalidate(dir); goto out_bad; } goto out_valid; } if (flags & LOOKUP_RCU) return -ECHILD; if (NFS_STALE(inode)) goto out_bad; return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags); out_valid: return nfs_lookup_revalidate_done(dir, dentry, inode, 1); out_bad: if (flags & LOOKUP_RCU) return -ECHILD; return nfs_lookup_revalidate_done(dir, dentry, inode, error); } static int __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) { if (flags & LOOKUP_RCU) { if (dentry->d_fsdata == NFS_FSDATA_BLOCKED) return -ECHILD; } else { /* Wait for unlink to complete - see unblock_revalidate() */ wait_var_event(&dentry->d_fsdata, smp_load_acquire(&dentry->d_fsdata) != NFS_FSDATA_BLOCKED); } return 0; } static int nfs_lookup_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) { if (__nfs_lookup_revalidate(dentry, flags)) return -ECHILD; return nfs_do_lookup_revalidate(dir, name, dentry, flags); } static void block_revalidate(struct dentry *dentry) { /* old devname - just in case */ kfree(dentry->d_fsdata); /* Any new reference that could lead to an open * will take ->d_lock in lookup_open() -> d_lookup(). * Holding this lock ensures we cannot race with * __nfs_lookup_revalidate() and removes and need * for further barriers. */ lockdep_assert_held(&dentry->d_lock); dentry->d_fsdata = NFS_FSDATA_BLOCKED; } static void unblock_revalidate(struct dentry *dentry) { store_release_wake_up(&dentry->d_fsdata, NULL); } /* * A weaker form of d_revalidate for revalidating just the d_inode(dentry) * when we don't really care about the dentry name. This is called when a * pathwalk ends on a dentry that was not found via a normal lookup in the * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals). * * In this situation, we just want to verify that the inode itself is OK * since the dentry might have changed on the server. */ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode = d_inode(dentry); int error = 0; /* * I believe we can only get a negative dentry here in the case of a * procfs-style symlink. Just assume it's correct for now, but we may * eventually need to do something more here. */ if (!inode) { dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n", __func__, dentry); return 1; } if (is_bad_inode(inode)) { dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", __func__, dentry); return 0; } error = nfs_lookup_verify_inode(inode, flags); dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n", __func__, inode->i_ino, error ? "invalid" : "valid"); return !error; } /* * This is called from dput() when d_count is going to 0. */ static int nfs_dentry_delete(const struct dentry *dentry) { dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n", dentry, dentry->d_flags); /* Unhash any dentry with a stale inode */ if (d_really_is_positive(dentry) && NFS_STALE(d_inode(dentry))) return 1; if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { /* Unhash it, so that ->d_iput() would be called */ return 1; } if (!(dentry->d_sb->s_flags & SB_ACTIVE)) { /* Unhash it, so that ancestors of killed async unlink * files will be cleaned up during umount */ return 1; } return 0; } /* Ensure that we revalidate inode->i_nlink */ static void nfs_drop_nlink(struct inode *inode) { spin_lock(&inode->i_lock); /* drop the inode if we're reasonably sure this is the last link */ if (inode->i_nlink > 0) drop_nlink(inode); NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter(); nfs_set_cache_invalid( inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME | NFS_INO_INVALID_NLINK); spin_unlock(&inode->i_lock); } /* * Called when the dentry loses inode. * We use it to clean up silly-renamed files. */ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) { if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { nfs_complete_unlink(dentry, inode); nfs_drop_nlink(inode); } iput(inode); } static void nfs_d_release(struct dentry *dentry) { /* free cached devname value, if it survived that far */ if (unlikely(dentry->d_fsdata)) { if (dentry->d_flags & DCACHE_NFSFS_RENAMED) WARN_ON(1); else kfree(dentry->d_fsdata); } } const struct dentry_operations nfs_dentry_operations = { .d_revalidate = nfs_lookup_revalidate, .d_weak_revalidate = nfs_weak_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, .d_release = nfs_d_release, }; EXPORT_SYMBOL_GPL(nfs_dentry_operations); struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *res; struct inode *inode = NULL; struct nfs_fh *fhandle = NULL; struct nfs_fattr *fattr = NULL; unsigned long dir_verifier; int error; dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry); nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); if (unlikely(dentry->d_name.len > NFS_SERVER(dir)->namelen)) return ERR_PTR(-ENAMETOOLONG); /* * If we're doing an exclusive create, optimize away the lookup * but don't hash the dentry. */ if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET) return NULL; res = ERR_PTR(-ENOMEM); fhandle = nfs_alloc_fhandle(); fattr = nfs_alloc_fattr_with_label(NFS_SERVER(dir)); if (fhandle == NULL || fattr == NULL) goto out; dir_verifier = nfs_save_change_attribute(dir); trace_nfs_lookup_enter(dir, dentry, flags); error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name, fhandle, fattr); if (error == -ENOENT) { if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) dir_verifier = inode_peek_iversion_raw(dir); goto no_entry; } if (error < 0) { res = ERR_PTR(error); goto out; } inode = nfs_fhget(dentry->d_sb, fhandle, fattr); res = ERR_CAST(inode); if (IS_ERR(res)) goto out; /* Notify readdir to use READDIRPLUS */ nfs_lookup_advise_force_readdirplus(dir, flags); no_entry: res = d_splice_alias(inode, dentry); if (res != NULL) { if (IS_ERR(res)) goto out; dentry = res; } nfs_set_verifier(dentry, dir_verifier); out: trace_nfs_lookup_exit(dir, dentry, flags, PTR_ERR_OR_ZERO(res)); nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); return res; } EXPORT_SYMBOL_GPL(nfs_lookup); void nfs_d_prune_case_insensitive_aliases(struct inode *inode) { /* Case insensitive server? Revalidate dentries */ if (inode && nfs_server_capable(inode, NFS_CAP_CASE_INSENSITIVE)) d_prune_aliases(inode); } EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases); #if IS_ENABLED(CONFIG_NFS_V4) static int nfs4_lookup_revalidate(struct inode *, const struct qstr *, struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs4_lookup_revalidate, .d_weak_revalidate = nfs_weak_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, .d_release = nfs_d_release, }; EXPORT_SYMBOL_GPL(nfs4_dentry_operations); static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp) { return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp); } static int do_open(struct inode *inode, struct file *filp) { nfs_fscache_open_file(inode, filp); return 0; } static int nfs_finish_open(struct nfs_open_context *ctx, struct dentry *dentry, struct file *file, unsigned open_flags) { int err; err = finish_open(file, dentry, do_open); if (err) goto out; if (S_ISREG(file_inode(file)->i_mode)) nfs_file_set_open_context(file, ctx); else err = -EOPENSTALE; out: return err; } int nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned open_flags, umode_t mode) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); struct nfs_open_context *ctx; struct dentry *res; struct iattr attr = { .ia_valid = ATTR_OPEN }; struct inode *inode; unsigned int lookup_flags = 0; unsigned long dir_verifier; bool switched = false; int created = 0; int err; /* Expect a negative dentry */ BUG_ON(d_inode(dentry)); dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n", dir->i_sb->s_id, dir->i_ino, dentry); err = nfs_check_flags(open_flags); if (err) return err; /* NFS only supports OPEN on regular files */ if ((open_flags & O_DIRECTORY)) { if (!d_in_lookup(dentry)) { /* * Hashed negative dentry with O_DIRECTORY: dentry was * revalidated and is fine, no need to perform lookup * again */ return -ENOENT; } lookup_flags = LOOKUP_OPEN|LOOKUP_DIRECTORY; goto no_open; } if (dentry->d_name.len > NFS_SERVER(dir)->namelen) return -ENAMETOOLONG; if (open_flags & O_CREAT) { struct nfs_server *server = NFS_SERVER(dir); if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) mode &= ~current_umask(); attr.ia_valid |= ATTR_MODE; attr.ia_mode = mode; } if (open_flags & O_TRUNC) { attr.ia_valid |= ATTR_SIZE; attr.ia_size = 0; } if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) { d_drop(dentry); switched = true; dentry = d_alloc_parallel(dentry->d_parent, &dentry->d_name, &wq); if (IS_ERR(dentry)) return PTR_ERR(dentry); if (unlikely(!d_in_lookup(dentry))) return finish_no_open(file, dentry); } ctx = create_nfs_open_context(dentry, open_flags, file); err = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; trace_nfs_atomic_open_enter(dir, ctx, open_flags); inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created); if (created) file->f_mode |= FMODE_CREATED; if (IS_ERR(inode)) { err = PTR_ERR(inode); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); put_nfs_open_context(ctx); d_drop(dentry); switch (err) { case -ENOENT: d_splice_alias(NULL, dentry); if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) dir_verifier = inode_peek_iversion_raw(dir); else dir_verifier = nfs_save_change_attribute(dir); nfs_set_verifier(dentry, dir_verifier); break; case -EISDIR: case -ENOTDIR: goto no_open; case -ELOOP: if (!(open_flags & O_NOFOLLOW)) goto no_open; break; /* case -EINVAL: */ default: break; } goto out; } file->f_mode |= FMODE_CAN_ODIRECT; err = nfs_finish_open(ctx, ctx->dentry, file, open_flags); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); put_nfs_open_context(ctx); out: if (unlikely(switched)) { d_lookup_done(dentry); dput(dentry); } return err; no_open: res = nfs_lookup(dir, dentry, lookup_flags); if (!res) { inode = d_inode(dentry); if ((lookup_flags & LOOKUP_DIRECTORY) && inode && !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) res = ERR_PTR(-ENOTDIR); else if (inode && S_ISREG(inode->i_mode)) res = ERR_PTR(-EOPENSTALE); } else if (!IS_ERR(res)) { inode = d_inode(res); if ((lookup_flags & LOOKUP_DIRECTORY) && inode && !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) { dput(res); res = ERR_PTR(-ENOTDIR); } else if (inode && S_ISREG(inode->i_mode)) { dput(res); res = ERR_PTR(-EOPENSTALE); } } if (switched) { d_lookup_done(dentry); if (!res) res = dentry; else dput(dentry); } return finish_no_open(file, res); } EXPORT_SYMBOL_GPL(nfs_atomic_open); static int nfs4_lookup_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) { struct inode *inode; if (__nfs_lookup_revalidate(dentry, flags)) return -ECHILD; trace_nfs_lookup_revalidate_enter(dir, dentry, flags); if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) goto full_reval; if (d_mountpoint(dentry)) goto full_reval; inode = d_inode(dentry); /* We can't create new files in nfs_open_revalidate(), so we * optimize away revalidation of negative dentries. */ if (inode == NULL) goto full_reval; if (nfs_verifier_is_delegated(dentry)) return nfs_lookup_revalidate_delegated(dir, dentry, inode); /* NFS only supports OPEN on regular files */ if (!S_ISREG(inode->i_mode)) goto full_reval; /* We cannot do exclusive creation on a positive dentry */ if (flags & (LOOKUP_EXCL | LOOKUP_REVAL)) goto reval_dentry; /* Check if the directory changed */ if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) goto reval_dentry; /* Let f_op->open() actually open (and revalidate) the file */ return 1; reval_dentry: if (flags & LOOKUP_RCU) return -ECHILD; return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags); full_reval: return nfs_do_lookup_revalidate(dir, name, dentry, flags); } #endif /* CONFIG_NFSV4 */ int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry, struct file *file, unsigned int open_flags, umode_t mode) { struct dentry *res = NULL; /* Same as look+open from lookup_open(), but with different O_TRUNC * handling. */ int error = 0; if (dentry->d_name.len > NFS_SERVER(dir)->namelen) return -ENAMETOOLONG; if (open_flags & O_CREAT) { error = nfs_do_create(dir, dentry, mode, open_flags); if (!error) { file->f_mode |= FMODE_CREATED; return finish_open(file, dentry, NULL); } else if (error != -EEXIST || open_flags & O_EXCL) return error; } if (d_in_lookup(dentry)) { /* The only flags nfs_lookup considers are * LOOKUP_EXCL and LOOKUP_RENAME_TARGET, and * we want those to be zero so the lookup isn't skipped. */ res = nfs_lookup(dir, dentry, 0); } return finish_no_open(file, res); } EXPORT_SYMBOL_GPL(nfs_atomic_open_v23); struct dentry * nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct dentry *parent = dget_parent(dentry); struct inode *dir = d_inode(parent); struct inode *inode; struct dentry *d; int error; d_drop(dentry); if (fhandle->size == 0) { error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name, fhandle, fattr); if (error) goto out_error; } nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); if (!(fattr->valid & NFS_ATTR_FATTR)) { struct nfs_server *server = NFS_SB(dentry->d_sb); error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr, NULL); if (error < 0) goto out_error; } inode = nfs_fhget(dentry->d_sb, fhandle, fattr); d = d_splice_alias(inode, dentry); out: dput(parent); return d; out_error: d = ERR_PTR(error); goto out; } EXPORT_SYMBOL_GPL(nfs_add_or_obtain); /* * Code common to create, mkdir, and mknod. */ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct dentry *d; d = nfs_add_or_obtain(dentry, fhandle, fattr); if (IS_ERR(d)) return PTR_ERR(d); /* Callers don't care */ dput(d); return 0; } EXPORT_SYMBOL_GPL(nfs_instantiate); /* * Following a failed create operation, we drop the dentry rather * than retain a negative dentry. This avoids a problem in the event * that the operation succeeded on the server, but an error in the * reply path made it appear to have failed. */ static int nfs_do_create(struct inode *dir, struct dentry *dentry, umode_t mode, int open_flags) { struct iattr attr; int error; open_flags |= O_CREAT; dfprintk(VFS, "NFS: create(%s/%lu), %pd\n", dir->i_sb->s_id, dir->i_ino, dentry); attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; if (open_flags & O_TRUNC) { attr.ia_size = 0; attr.ia_valid |= ATTR_SIZE; } trace_nfs_create_enter(dir, dentry, open_flags); error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); trace_nfs_create_exit(dir, dentry, open_flags, error); if (error != 0) goto out_err; return 0; out_err: d_drop(dentry); return error; } int nfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return nfs_do_create(dir, dentry, mode, excl ? O_EXCL : 0); } EXPORT_SYMBOL_GPL(nfs_create); /* * See comments for nfs_proc_create regarding failed operations. */ int nfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct iattr attr; int status; dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n", dir->i_sb->s_id, dir->i_ino, dentry); attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; trace_nfs_mknod_enter(dir, dentry); status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); trace_nfs_mknod_exit(dir, dentry, status); if (status != 0) goto out_err; return 0; out_err: d_drop(dentry); return status; } EXPORT_SYMBOL_GPL(nfs_mknod); /* * See comments for nfs_proc_create regarding failed operations. */ struct dentry *nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct iattr attr; struct dentry *ret; dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n", dir->i_sb->s_id, dir->i_ino, dentry); attr.ia_valid = ATTR_MODE; attr.ia_mode = mode | S_IFDIR; trace_nfs_mkdir_enter(dir, dentry); ret = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); trace_nfs_mkdir_exit(dir, dentry, PTR_ERR_OR_ZERO(ret)); return ret; } EXPORT_SYMBOL_GPL(nfs_mkdir); static void nfs_dentry_handle_enoent(struct dentry *dentry) { if (simple_positive(dentry)) d_delete(dentry); } static void nfs_dentry_remove_handle_error(struct inode *dir, struct dentry *dentry, int error) { switch (error) { case -ENOENT: if (d_really_is_positive(dentry)) d_delete(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); break; case 0: nfs_d_prune_case_insensitive_aliases(d_inode(dentry)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); } } int nfs_rmdir(struct inode *dir, struct dentry *dentry) { int error; dfprintk(VFS, "NFS: rmdir(%s/%lu), %pd\n", dir->i_sb->s_id, dir->i_ino, dentry); trace_nfs_rmdir_enter(dir, dentry); if (d_really_is_positive(dentry)) { down_write(&NFS_I(d_inode(dentry))->rmdir_sem); error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ switch (error) { case 0: clear_nlink(d_inode(dentry)); break; case -ENOENT: nfs_dentry_handle_enoent(dentry); } up_write(&NFS_I(d_inode(dentry))->rmdir_sem); } else error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); nfs_dentry_remove_handle_error(dir, dentry, error); trace_nfs_rmdir_exit(dir, dentry, error); return error; } EXPORT_SYMBOL_GPL(nfs_rmdir); /* * Remove a file after making sure there are no pending writes, * and after checking that the file has only one user. * * We invalidate the attribute cache and free the inode prior to the operation * to avoid possible races if the server reuses the inode. */ static int nfs_safe_remove(struct dentry *dentry) { struct inode *dir = d_inode(dentry->d_parent); struct inode *inode = d_inode(dentry); int error = -EBUSY; dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry); /* If the dentry was sillyrenamed, we simply call d_delete() */ if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { error = 0; goto out; } trace_nfs_remove_enter(dir, dentry); if (inode != NULL) { error = NFS_PROTO(dir)->remove(dir, dentry); if (error == 0) nfs_drop_nlink(inode); } else error = NFS_PROTO(dir)->remove(dir, dentry); if (error == -ENOENT) nfs_dentry_handle_enoent(dentry); trace_nfs_remove_exit(dir, dentry, error); out: return error; } /* We do silly rename. In case sillyrename() returns -EBUSY, the inode * belongs to an active ".nfs..." file and we return -EBUSY. * * If sillyrename() returns 0, we do nothing, otherwise we unlink. */ int nfs_unlink(struct inode *dir, struct dentry *dentry) { int error; dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id, dir->i_ino, dentry); trace_nfs_unlink_enter(dir, dentry); spin_lock(&dentry->d_lock); if (d_count(dentry) > 1 && !test_bit(NFS_INO_PRESERVE_UNLINKED, &NFS_I(d_inode(dentry))->flags)) { spin_unlock(&dentry->d_lock); /* Start asynchronous writeout of the inode */ write_inode_now(d_inode(dentry), 0); error = nfs_sillyrename(dir, dentry); goto out; } /* We must prevent any concurrent open until the unlink * completes. ->d_revalidate will wait for ->d_fsdata * to clear. We set it here to ensure no lookup succeeds until * the unlink is complete on the server. */ error = -ETXTBSY; if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) || WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) { spin_unlock(&dentry->d_lock); goto out; } block_revalidate(dentry); spin_unlock(&dentry->d_lock); error = nfs_safe_remove(dentry); nfs_dentry_remove_handle_error(dir, dentry, error); unblock_revalidate(dentry); out: trace_nfs_unlink_exit(dir, dentry, error); return error; } EXPORT_SYMBOL_GPL(nfs_unlink); /* * To create a symbolic link, most file systems instantiate a new inode, * add a page to it containing the path, then write it out to the disk * using prepare_write/commit_write. * * Unfortunately the NFS client can't create the in-core inode first * because it needs a file handle to create an in-core inode (see * fs/nfs/inode.c:nfs_fhget). We only have a file handle *after* the * symlink request has completed on the server. * * So instead we allocate a raw page, copy the symname into it, then do * the SYMLINK request with the page as the buffer. If it succeeds, we * now have a new file handle and can instantiate an in-core NFS inode * and move the raw page into its mapping. */ int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { struct folio *folio; char *kaddr; struct iattr attr; unsigned int pathlen = strlen(symname); int error; dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s)\n", dir->i_sb->s_id, dir->i_ino, dentry, symname); if (pathlen > PAGE_SIZE) return -ENAMETOOLONG; attr.ia_mode = S_IFLNK | S_IRWXUGO; attr.ia_valid = ATTR_MODE; folio = folio_alloc(GFP_USER, 0); if (!folio) return -ENOMEM; kaddr = folio_address(folio); memcpy(kaddr, symname, pathlen); if (pathlen < PAGE_SIZE) memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); trace_nfs_symlink_enter(dir, dentry); error = NFS_PROTO(dir)->symlink(dir, dentry, folio, pathlen, &attr); trace_nfs_symlink_exit(dir, dentry, error); if (error != 0) { dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n", dir->i_sb->s_id, dir->i_ino, dentry, symname, error); d_drop(dentry); folio_put(folio); return error; } nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); /* * No big deal if we can't add this page to the page cache here. * READLINK will get the missing page from the server if needed. */ if (filemap_add_folio(d_inode(dentry)->i_mapping, folio, 0, GFP_KERNEL) == 0) { folio_mark_uptodate(folio); folio_unlock(folio); } folio_put(folio); return 0; } EXPORT_SYMBOL_GPL(nfs_symlink); int nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(old_dentry); int error; dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n", old_dentry, dentry); trace_nfs_link_enter(inode, dir, dentry); d_drop(dentry); if (S_ISREG(inode->i_mode)) nfs_sync_inode(inode); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); ihold(inode); d_add(dentry, inode); } trace_nfs_link_exit(inode, dir, dentry, error); return error; } EXPORT_SYMBOL_GPL(nfs_link); static void nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data) { struct dentry *new_dentry = data->new_dentry; unblock_revalidate(new_dentry); } static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry, struct dentry *new_dentry) { struct nfs_server *server = NFS_SB(old_dentry->d_sb); if (old_dentry->d_parent != new_dentry->d_parent) return false; if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE) return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN); return true; } /* * RENAME * FIXME: Some nfsds, like the Linux user space nfsd, may generate a * different file handle for the same inode after a rename (e.g. when * moving to a different directory). A fail-safe method to do so would * be to look up old_dir/old_name, create a link to new_dir/new_name and * rename the old file using the sillyrename stuff. This way, the original * file in old_dir will go away when the last process iput()s the inode. * * FIXED. * * It actually works quite well. One needs to have the possibility for * at least one ".nfs..." file in each directory the file ever gets * moved or linked to which happens automagically with the new * implementation that only depends on the dcache stuff instead of * using the inode layer * * Unfortunately, things are a little more complicated than indicated * above. For a cross-directory move, we want to make sure we can get * rid of the old inode after the operation. This means there must be * no pending writes (if it's a file), and the use count must be 1. * If these conditions are met, we can drop the dentries before doing * the rename. */ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); struct dentry *dentry = NULL; struct rpc_task *task; bool must_unblock = false; int error = -EBUSY; if (flags) return -EINVAL; dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n", old_dentry, new_dentry, d_count(new_dentry)); trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry); /* * For non-directories, check whether the target is busy and if so, * make a copy of the dentry and then do a silly-rename. If the * silly-rename succeeds, the copied dentry is hashed and becomes * the new target. */ if (new_inode && !S_ISDIR(new_inode->i_mode)) { /* We must prevent any concurrent open until the unlink * completes. ->d_revalidate will wait for ->d_fsdata * to clear. We set it here to ensure no lookup succeeds until * the unlink is complete on the server. */ error = -ETXTBSY; if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) || WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED)) goto out; spin_lock(&new_dentry->d_lock); if (d_count(new_dentry) > 2) { int err; spin_unlock(&new_dentry->d_lock); /* copy the target dentry's name */ dentry = d_alloc(new_dentry->d_parent, &new_dentry->d_name); if (!dentry) goto out; /* silly-rename the existing target ... */ err = nfs_sillyrename(new_dir, new_dentry); if (err) goto out; new_dentry = dentry; new_inode = NULL; } else { block_revalidate(new_dentry); must_unblock = true; spin_unlock(&new_dentry->d_lock); } } if (S_ISREG(old_inode->i_mode) && nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry)) nfs_sync_inode(old_inode); task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, must_unblock ? nfs_unblock_rename : NULL); if (IS_ERR(task)) { if (must_unblock) unblock_revalidate(new_dentry); error = PTR_ERR(task); goto out; } error = rpc_wait_for_completion_task(task); if (error != 0) { ((struct nfs_renamedata *)task->tk_calldata)->cancelled = 1; /* Paired with the atomic_dec_and_test() barrier in rpc_do_put_task() */ smp_wmb(); } else error = task->tk_status; rpc_put_task(task); /* Ensure the inode attributes are revalidated */ if (error == 0) { spin_lock(&old_inode->i_lock); NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter(); nfs_set_cache_invalid(old_inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME | NFS_INO_REVAL_FORCED); spin_unlock(&old_inode->i_lock); } out: trace_nfs_rename_exit(old_dir, old_dentry, new_dir, new_dentry, error); if (!error) { if (new_inode != NULL) nfs_drop_nlink(new_inode); /* * The d_move() should be here instead of in an async RPC completion * handler because we need the proper locks to move the dentry. If * we're interrupted by a signal, the async RPC completion handler * should mark the directories for revalidation. */ d_move(old_dentry, new_dentry); nfs_set_verifier(old_dentry, nfs_save_change_attribute(new_dir)); } else if (error == -ENOENT) nfs_dentry_handle_enoent(old_dentry); /* new dentry created? */ if (dentry) dput(dentry); return error; } EXPORT_SYMBOL_GPL(nfs_rename); static DEFINE_SPINLOCK(nfs_access_lru_lock); static LIST_HEAD(nfs_access_lru_list); static atomic_long_t nfs_access_nr_entries; static unsigned long nfs_access_max_cachesize = 4*1024*1024; module_param(nfs_access_max_cachesize, ulong, 0644); MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length"); static void nfs_access_free_entry(struct nfs_access_entry *entry) { put_group_info(entry->group_info); kfree_rcu(entry, rcu_head); smp_mb__before_atomic(); atomic_long_dec(&nfs_access_nr_entries); smp_mb__after_atomic(); } static void nfs_access_free_list(struct list_head *head) { struct nfs_access_entry *cache; while (!list_empty(head)) { cache = list_entry(head->next, struct nfs_access_entry, lru); list_del(&cache->lru); nfs_access_free_entry(cache); } } static unsigned long nfs_do_access_cache_scan(unsigned int nr_to_scan) { LIST_HEAD(head); struct nfs_inode *nfsi, *next; struct nfs_access_entry *cache; long freed = 0; spin_lock(&nfs_access_lru_lock); list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { struct inode *inode; if (nr_to_scan-- == 0) break; inode = &nfsi->vfs_inode; spin_lock(&inode->i_lock); if (list_empty(&nfsi->access_cache_entry_lru)) goto remove_lru_entry; cache = list_entry(nfsi->access_cache_entry_lru.next, struct nfs_access_entry, lru); list_move(&cache->lru, &head); rb_erase(&cache->rb_node, &nfsi->access_cache); freed++; if (!list_empty(&nfsi->access_cache_entry_lru)) list_move_tail(&nfsi->access_cache_inode_lru, &nfs_access_lru_list); else { remove_lru_entry: list_del_init(&nfsi->access_cache_inode_lru); smp_mb__before_atomic(); clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); smp_mb__after_atomic(); } spin_unlock(&inode->i_lock); } spin_unlock(&nfs_access_lru_lock); nfs_access_free_list(&head); return freed; } unsigned long nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { int nr_to_scan = sc->nr_to_scan; gfp_t gfp_mask = sc->gfp_mask; if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) return SHRINK_STOP; return nfs_do_access_cache_scan(nr_to_scan); } unsigned long nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc) { return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries)); } static void nfs_access_cache_enforce_limit(void) { long nr_entries = atomic_long_read(&nfs_access_nr_entries); unsigned long diff; unsigned int nr_to_scan; if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize) return; nr_to_scan = 100; diff = nr_entries - nfs_access_max_cachesize; if (diff < nr_to_scan) nr_to_scan = diff; nfs_do_access_cache_scan(nr_to_scan); } static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) { struct rb_root *root_node = &nfsi->access_cache; struct rb_node *n; struct nfs_access_entry *entry; /* Unhook entries from the cache */ while ((n = rb_first(root_node)) != NULL) { entry = rb_entry(n, struct nfs_access_entry, rb_node); rb_erase(n, root_node); list_move(&entry->lru, head); } nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; } void nfs_access_zap_cache(struct inode *inode) { LIST_HEAD(head); if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0) return; /* Remove from global LRU init */ spin_lock(&nfs_access_lru_lock); if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) list_del_init(&NFS_I(inode)->access_cache_inode_lru); spin_lock(&inode->i_lock); __nfs_access_zap_cache(NFS_I(inode), &head); spin_unlock(&inode->i_lock); spin_unlock(&nfs_access_lru_lock); nfs_access_free_list(&head); } EXPORT_SYMBOL_GPL(nfs_access_zap_cache); static int access_cmp(const struct cred *a, const struct nfs_access_entry *b) { struct group_info *ga, *gb; int g; if (uid_lt(a->fsuid, b->fsuid)) return -1; if (uid_gt(a->fsuid, b->fsuid)) return 1; if (gid_lt(a->fsgid, b->fsgid)) return -1; if (gid_gt(a->fsgid, b->fsgid)) return 1; ga = a->group_info; gb = b->group_info; if (ga == gb) return 0; if (ga == NULL) return -1; if (gb == NULL) return 1; if (ga->ngroups < gb->ngroups) return -1; if (ga->ngroups > gb->ngroups) return 1; for (g = 0; g < ga->ngroups; g++) { if (gid_lt(ga->gid[g], gb->gid[g])) return -1; if (gid_gt(ga->gid[g], gb->gid[g])) return 1; } return 0; } static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred) { struct rb_node *n = NFS_I(inode)->access_cache.rb_node; while (n != NULL) { struct nfs_access_entry *entry = rb_entry(n, struct nfs_access_entry, rb_node); int cmp = access_cmp(cred, entry); if (cmp < 0) n = n->rb_left; else if (cmp > 0) n = n->rb_right; else return entry; } return NULL; } static u64 nfs_access_login_time(const struct task_struct *task, const struct cred *cred) { const struct task_struct *parent; const struct cred *pcred; u64 ret; rcu_read_lock(); for (;;) { parent = rcu_dereference(task->real_parent); pcred = __task_cred(parent); if (parent == task || cred_fscmp(pcred, cred) != 0) break; task = parent; } ret = task->start_time; rcu_read_unlock(); return ret; } static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block) { struct nfs_inode *nfsi = NFS_I(inode); u64 login_time = nfs_access_login_time(current, cred); struct nfs_access_entry *cache; bool retry = true; int err; spin_lock(&inode->i_lock); for(;;) { if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS) goto out_zap; cache = nfs_access_search_rbtree(inode, cred); err = -ENOENT; if (cache == NULL) goto out; /* Found an entry, is our attribute cache valid? */ if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) break; if (!retry) break; err = -ECHILD; if (!may_block) goto out; spin_unlock(&inode->i_lock); err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); if (err) return err; spin_lock(&inode->i_lock); retry = false; } err = -ENOENT; if ((s64)(login_time - cache->timestamp) > 0) goto out; *mask = cache->mask; list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru); err = 0; out: spin_unlock(&inode->i_lock); return err; out_zap: spin_unlock(&inode->i_lock); nfs_access_zap_cache(inode); return -ENOENT; } static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask) { /* Only check the most recently returned cache entry, * but do it without locking. */ struct nfs_inode *nfsi = NFS_I(inode); u64 login_time = nfs_access_login_time(current, cred); struct nfs_access_entry *cache; int err = -ECHILD; struct list_head *lh; rcu_read_lock(); if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS) goto out; lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru)); cache = list_entry(lh, struct nfs_access_entry, lru); if (lh == &nfsi->access_cache_entry_lru || access_cmp(cred, cache) != 0) cache = NULL; if (cache == NULL) goto out; if ((s64)(login_time - cache->timestamp) > 0) goto out; if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) goto out; *mask = cache->mask; err = 0; out: rcu_read_unlock(); return err; } int nfs_access_get_cached(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block) { int status; status = nfs_access_get_cached_rcu(inode, cred, mask); if (status != 0) status = nfs_access_get_cached_locked(inode, cred, mask, may_block); return status; } EXPORT_SYMBOL_GPL(nfs_access_get_cached); static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set, const struct cred *cred) { struct nfs_inode *nfsi = NFS_I(inode); struct rb_root *root_node = &nfsi->access_cache; struct rb_node **p = &root_node->rb_node; struct rb_node *parent = NULL; struct nfs_access_entry *entry; int cmp; spin_lock(&inode->i_lock); while (*p != NULL) { parent = *p; entry = rb_entry(parent, struct nfs_access_entry, rb_node); cmp = access_cmp(cred, entry); if (cmp < 0) p = &parent->rb_left; else if (cmp > 0) p = &parent->rb_right; else goto found; } rb_link_node(&set->rb_node, parent, p); rb_insert_color(&set->rb_node, root_node); list_add_tail(&set->lru, &nfsi->access_cache_entry_lru); spin_unlock(&inode->i_lock); return; found: rb_replace_node(parent, &set->rb_node, root_node); list_add_tail(&set->lru, &nfsi->access_cache_entry_lru); list_del(&entry->lru); spin_unlock(&inode->i_lock); nfs_access_free_entry(entry); } void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set, const struct cred *cred) { struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) return; RB_CLEAR_NODE(&cache->rb_node); cache->fsuid = cred->fsuid; cache->fsgid = cred->fsgid; cache->group_info = get_group_info(cred->group_info); cache->mask = set->mask; cache->timestamp = ktime_get_ns(); /* The above field assignments must be visible * before this item appears on the lru. We cannot easily * use rcu_assign_pointer, so just force the memory barrier. */ smp_wmb(); nfs_access_add_rbtree(inode, cache, cred); /* Update accounting */ smp_mb__before_atomic(); atomic_long_inc(&nfs_access_nr_entries); smp_mb__after_atomic(); /* Add inode to global LRU list */ if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { spin_lock(&nfs_access_lru_lock); if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list); spin_unlock(&nfs_access_lru_lock); } nfs_access_cache_enforce_limit(); } EXPORT_SYMBOL_GPL(nfs_access_add_cache); #define NFS_MAY_READ (NFS_ACCESS_READ) #define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \ NFS_ACCESS_EXTEND | \ NFS_ACCESS_DELETE) #define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \ NFS_ACCESS_EXTEND) #define NFS_DIR_MAY_WRITE NFS_MAY_WRITE #define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP) #define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE) static int nfs_access_calc_mask(u32 access_result, umode_t umode) { int mask = 0; if (access_result & NFS_MAY_READ) mask |= MAY_READ; if (S_ISDIR(umode)) { if ((access_result & NFS_DIR_MAY_WRITE) == NFS_DIR_MAY_WRITE) mask |= MAY_WRITE; if ((access_result & NFS_MAY_LOOKUP) == NFS_MAY_LOOKUP) mask |= MAY_EXEC; } else if (S_ISREG(umode)) { if ((access_result & NFS_FILE_MAY_WRITE) == NFS_FILE_MAY_WRITE) mask |= MAY_WRITE; if ((access_result & NFS_MAY_EXECUTE) == NFS_MAY_EXECUTE) mask |= MAY_EXEC; } else if (access_result & NFS_MAY_WRITE) mask |= MAY_WRITE; return mask; } void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result) { entry->mask = access_result; } EXPORT_SYMBOL_GPL(nfs_access_set_mask); static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) { struct nfs_access_entry cache; bool may_block = (mask & MAY_NOT_BLOCK) == 0; int cache_mask = -1; int status; trace_nfs_access_enter(inode); status = nfs_access_get_cached(inode, cred, &cache.mask, may_block); if (status == 0) goto out_cached; status = -ECHILD; if (!may_block) goto out; /* * Determine which access bits we want to ask for... */ cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND | nfs_access_xattr_mask(NFS_SERVER(inode)); if (S_ISDIR(inode->i_mode)) cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP; else cache.mask |= NFS_ACCESS_EXECUTE; status = NFS_PROTO(inode)->access(inode, &cache, cred); if (status != 0) { if (status == -ESTALE) { if (!S_ISDIR(inode->i_mode)) nfs_set_inode_stale(inode); else nfs_zap_caches(inode); } goto out; } nfs_access_add_cache(inode, &cache, cred); out_cached: cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode); if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) status = -EACCES; out: trace_nfs_access_exit(inode, mask, cache_mask, status); return status; } static int nfs_open_permission_mask(int openflags) { int mask = 0; if (openflags & __FMODE_EXEC) { /* ONLY check exec rights */ mask = MAY_EXEC; } else { if ((openflags & O_ACCMODE) != O_WRONLY) mask |= MAY_READ; if ((openflags & O_ACCMODE) != O_RDONLY) mask |= MAY_WRITE; } return mask; } int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags) { return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); } EXPORT_SYMBOL_GPL(nfs_may_open); static int nfs_execute_ok(struct inode *inode, int mask) { struct nfs_server *server = NFS_SERVER(inode); int ret = 0; if (S_ISDIR(inode->i_mode)) return 0; if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_MODE)) { if (mask & MAY_NOT_BLOCK) return -ECHILD; ret = __nfs_revalidate_inode(server, inode); } if (ret == 0 && !execute_ok(inode)) ret = -EACCES; return ret; } int nfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { const struct cred *cred = current_cred(); int res = 0; nfs_inc_stats(inode, NFSIOS_VFSACCESS); if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) goto out; /* Is this sys_access() ? */ if (mask & (MAY_ACCESS | MAY_CHDIR)) goto force_lookup; switch (inode->i_mode & S_IFMT) { case S_IFLNK: goto out; case S_IFREG: if ((mask & MAY_OPEN) && nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)) return 0; break; case S_IFDIR: /* * Optimize away all write operations, since the server * will check permissions when we perform the op. */ if ((mask & MAY_WRITE) && !(mask & MAY_READ)) goto out; } force_lookup: if (!NFS_PROTO(inode)->access) goto out_notsup; res = nfs_do_access(inode, cred, mask); out: if (!res && (mask & MAY_EXEC)) res = nfs_execute_ok(inode, mask); dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n", inode->i_sb->s_id, inode->i_ino, mask, res); return res; out_notsup: if (mask & MAY_NOT_BLOCK) return -ECHILD; res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE | NFS_INO_INVALID_OTHER); if (res == 0) res = generic_permission(&nop_mnt_idmap, inode, mask); goto out; } EXPORT_SYMBOL_GPL(nfs_permission); |
| 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 | // SPDX-License-Identifier: GPL-2.0-only /*************************************************************************** * Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org> * * * * Based on Logitech G13 driver (v0.4) * * Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu> * * * ***************************************************************************/ #include <linux/hid.h> #include <linux/hid-debug.h> #include <linux/fb.h> #include <linux/seq_file.h> #include <linux/debugfs.h> #include <linux/module.h> #include <linux/uaccess.h> #include "hid-picolcd.h" static int picolcd_debug_reset_show(struct seq_file *f, void *p) { if (picolcd_fbinfo((struct picolcd_data *)f->private)) seq_printf(f, "all fb\n"); else seq_printf(f, "all\n"); return 0; } static int picolcd_debug_reset_open(struct inode *inode, struct file *f) { return single_open(f, picolcd_debug_reset_show, inode->i_private); } static ssize_t picolcd_debug_reset_write(struct file *f, const char __user *user_buf, size_t count, loff_t *ppos) { struct picolcd_data *data = ((struct seq_file *)f->private_data)->private; char buf[32]; size_t cnt = min(count, sizeof(buf)-1); if (copy_from_user(buf, user_buf, cnt)) return -EFAULT; while (cnt > 0 && (buf[cnt-1] == ' ' || buf[cnt-1] == '\n')) cnt--; buf[cnt] = '\0'; if (strcmp(buf, "all") == 0) { picolcd_reset(data->hdev); picolcd_fb_reset(data, 1); } else if (strcmp(buf, "fb") == 0) { picolcd_fb_reset(data, 1); } else { return -EINVAL; } return count; } static const struct file_operations picolcd_debug_reset_fops = { .owner = THIS_MODULE, .open = picolcd_debug_reset_open, .read = seq_read, .llseek = seq_lseek, .write = picolcd_debug_reset_write, .release = single_release, }; /* * The "eeprom" file */ static ssize_t picolcd_debug_eeprom_read(struct file *f, char __user *u, size_t s, loff_t *off) { struct picolcd_data *data = f->private_data; struct picolcd_pending *resp; u8 raw_data[3]; ssize_t ret = -EIO; if (s == 0) return -EINVAL; if (*off > 0x0ff) return 0; /* prepare buffer with info about what we want to read (addr & len) */ raw_data[0] = *off & 0xff; raw_data[1] = (*off >> 8) & 0xff; raw_data[2] = s < 20 ? s : 20; if (*off + raw_data[2] > 0xff) raw_data[2] = 0x100 - *off; resp = picolcd_send_and_wait(data->hdev, REPORT_EE_READ, raw_data, sizeof(raw_data)); if (!resp) return -EIO; if (resp->in_report && resp->in_report->id == REPORT_EE_DATA) { /* successful read :) */ ret = resp->raw_data[2]; if (ret > s) ret = s; if (copy_to_user(u, resp->raw_data+3, ret)) ret = -EFAULT; else *off += ret; } /* anything else is some kind of IO error */ kfree(resp); return ret; } static ssize_t picolcd_debug_eeprom_write(struct file *f, const char __user *u, size_t s, loff_t *off) { struct picolcd_data *data = f->private_data; struct picolcd_pending *resp; ssize_t ret = -EIO; u8 raw_data[23]; if (s == 0) return -EINVAL; if (*off > 0x0ff) return -ENOSPC; memset(raw_data, 0, sizeof(raw_data)); raw_data[0] = *off & 0xff; raw_data[1] = (*off >> 8) & 0xff; raw_data[2] = min_t(size_t, 20, s); if (*off + raw_data[2] > 0xff) raw_data[2] = 0x100 - *off; if (copy_from_user(raw_data+3, u, min((u8)20, raw_data[2]))) return -EFAULT; resp = picolcd_send_and_wait(data->hdev, REPORT_EE_WRITE, raw_data, sizeof(raw_data)); if (!resp) return -EIO; if (resp->in_report && resp->in_report->id == REPORT_EE_DATA) { /* check if written data matches */ if (memcmp(raw_data, resp->raw_data, 3+raw_data[2]) == 0) { *off += raw_data[2]; ret = raw_data[2]; } } kfree(resp); return ret; } /* * Notes: * - read/write happens in chunks of at most 20 bytes, it's up to userspace * to loop in order to get more data. * - on write errors on otherwise correct write request the bytes * that should have been written are in undefined state. */ static const struct file_operations picolcd_debug_eeprom_fops = { .owner = THIS_MODULE, .open = simple_open, .read = picolcd_debug_eeprom_read, .write = picolcd_debug_eeprom_write, .llseek = generic_file_llseek, }; /* * The "flash" file */ /* record a flash address to buf (bounds check to be done by caller) */ static int _picolcd_flash_setaddr(struct picolcd_data *data, u8 *buf, long off) { buf[0] = off & 0xff; buf[1] = (off >> 8) & 0xff; if (data->addr_sz == 3) buf[2] = (off >> 16) & 0xff; return data->addr_sz == 2 ? 2 : 3; } /* read a given size of data (bounds check to be done by caller) */ static ssize_t _picolcd_flash_read(struct picolcd_data *data, int report_id, char __user *u, size_t s, loff_t *off) { struct picolcd_pending *resp; u8 raw_data[4]; ssize_t ret = 0; int len_off, err = -EIO; while (s > 0) { err = -EIO; len_off = _picolcd_flash_setaddr(data, raw_data, *off); raw_data[len_off] = s > 32 ? 32 : s; resp = picolcd_send_and_wait(data->hdev, report_id, raw_data, len_off+1); if (!resp || !resp->in_report) goto skip; if (resp->in_report->id == REPORT_MEMORY || resp->in_report->id == REPORT_BL_READ_MEMORY) { if (memcmp(raw_data, resp->raw_data, len_off+1) != 0) goto skip; if (copy_to_user(u+ret, resp->raw_data+len_off+1, raw_data[len_off])) { err = -EFAULT; goto skip; } *off += raw_data[len_off]; s -= raw_data[len_off]; ret += raw_data[len_off]; err = 0; } skip: kfree(resp); if (err) return ret > 0 ? ret : err; } return ret; } static ssize_t picolcd_debug_flash_read(struct file *f, char __user *u, size_t s, loff_t *off) { struct picolcd_data *data = f->private_data; if (s == 0) return -EINVAL; if (*off > 0x05fff) return 0; if (*off + s > 0x05fff) s = 0x06000 - *off; if (data->status & PICOLCD_BOOTLOADER) return _picolcd_flash_read(data, REPORT_BL_READ_MEMORY, u, s, off); else return _picolcd_flash_read(data, REPORT_READ_MEMORY, u, s, off); } /* erase block aligned to 64bytes boundary */ static ssize_t _picolcd_flash_erase64(struct picolcd_data *data, int report_id, loff_t *off) { struct picolcd_pending *resp; u8 raw_data[3]; int len_off; ssize_t ret = -EIO; if (*off & 0x3f) return -EINVAL; len_off = _picolcd_flash_setaddr(data, raw_data, *off); resp = picolcd_send_and_wait(data->hdev, report_id, raw_data, len_off); if (!resp || !resp->in_report) goto skip; if (resp->in_report->id == REPORT_MEMORY || resp->in_report->id == REPORT_BL_ERASE_MEMORY) { if (memcmp(raw_data, resp->raw_data, len_off) != 0) goto skip; ret = 0; } skip: kfree(resp); return ret; } /* write a given size of data (bounds check to be done by caller) */ static ssize_t _picolcd_flash_write(struct picolcd_data *data, int report_id, const char __user *u, size_t s, loff_t *off) { struct picolcd_pending *resp; u8 raw_data[36]; ssize_t ret = 0; int len_off, err = -EIO; while (s > 0) { err = -EIO; len_off = _picolcd_flash_setaddr(data, raw_data, *off); raw_data[len_off] = s > 32 ? 32 : s; if (copy_from_user(raw_data+len_off+1, u, raw_data[len_off])) { err = -EFAULT; break; } resp = picolcd_send_and_wait(data->hdev, report_id, raw_data, len_off+1+raw_data[len_off]); if (!resp || !resp->in_report) goto skip; if (resp->in_report->id == REPORT_MEMORY || resp->in_report->id == REPORT_BL_WRITE_MEMORY) { if (memcmp(raw_data, resp->raw_data, len_off+1+raw_data[len_off]) != 0) goto skip; *off += raw_data[len_off]; s -= raw_data[len_off]; ret += raw_data[len_off]; err = 0; } skip: kfree(resp); if (err) break; } return ret > 0 ? ret : err; } static ssize_t picolcd_debug_flash_write(struct file *f, const char __user *u, size_t s, loff_t *off) { struct picolcd_data *data = f->private_data; ssize_t err, ret = 0; int report_erase, report_write; if (s == 0) return -EINVAL; if (*off > 0x5fff) return -ENOSPC; if (s & 0x3f) return -EINVAL; if (*off & 0x3f) return -EINVAL; if (data->status & PICOLCD_BOOTLOADER) { report_erase = REPORT_BL_ERASE_MEMORY; report_write = REPORT_BL_WRITE_MEMORY; } else { report_erase = REPORT_ERASE_MEMORY; report_write = REPORT_WRITE_MEMORY; } mutex_lock(&data->mutex_flash); while (s > 0) { err = _picolcd_flash_erase64(data, report_erase, off); if (err) break; err = _picolcd_flash_write(data, report_write, u, 64, off); if (err < 0) break; ret += err; *off += err; s -= err; if (err != 64) break; } mutex_unlock(&data->mutex_flash); return ret > 0 ? ret : err; } /* * Notes: * - concurrent writing is prevented by mutex and all writes must be * n*64 bytes and 64-byte aligned, each write being preceded by an * ERASE which erases a 64byte block. * If less than requested was written or an error is returned for an * otherwise correct write request the next 64-byte block which should * have been written is in undefined state (mostly: original, erased, * (half-)written with write error) * - reading can happen without special restriction */ static const struct file_operations picolcd_debug_flash_fops = { .owner = THIS_MODULE, .open = simple_open, .read = picolcd_debug_flash_read, .write = picolcd_debug_flash_write, .llseek = generic_file_llseek, }; /* * Helper code for HID report level dumping/debugging */ static const char * const error_codes[] = { "success", "parameter missing", "data_missing", "block readonly", "block not erasable", "block too big", "section overflow", "invalid command length", "invalid data length", }; static void dump_buff_as_hex(char *dst, size_t dst_sz, const u8 *data, const size_t data_len) { int i, j; for (i = j = 0; i < data_len && j + 4 < dst_sz; i++) { dst[j++] = hex_asc[(data[i] >> 4) & 0x0f]; dst[j++] = hex_asc[data[i] & 0x0f]; dst[j++] = ' '; } dst[j] = '\0'; if (j > 0) dst[j-1] = '\n'; if (i < data_len && j > 2) dst[j-2] = dst[j-3] = '.'; } void picolcd_debug_out_report(struct picolcd_data *data, struct hid_device *hdev, struct hid_report *report) { u8 *raw_data; int raw_size = (report->size >> 3) + 1; char *buff; #define BUFF_SZ 256 /* Avoid unnecessary overhead if debugfs is disabled */ if (list_empty(&hdev->debug_list)) return; buff = kmalloc(BUFF_SZ, GFP_ATOMIC); if (!buff) return; raw_data = hid_alloc_report_buf(report, GFP_ATOMIC); if (!raw_data) { kfree(buff); return; } snprintf(buff, BUFF_SZ, "\nout report %d (size %d) = ", report->id, raw_size); hid_debug_event(hdev, buff); raw_data[0] = report->id; hid_output_report(report, raw_data); dump_buff_as_hex(buff, BUFF_SZ, raw_data, raw_size); hid_debug_event(hdev, buff); switch (report->id) { case REPORT_LED_STATE: /* 1 data byte with GPO state */ snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_LED_STATE", report->id, raw_size-1); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tGPO state: 0x%02x\n", raw_data[1]); hid_debug_event(hdev, buff); break; case REPORT_BRIGHTNESS: /* 1 data byte with brightness */ snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_BRIGHTNESS", report->id, raw_size-1); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tBrightness: 0x%02x\n", raw_data[1]); hid_debug_event(hdev, buff); break; case REPORT_CONTRAST: /* 1 data byte with contrast */ snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_CONTRAST", report->id, raw_size-1); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tContrast: 0x%02x\n", raw_data[1]); hid_debug_event(hdev, buff); break; case REPORT_RESET: /* 2 data bytes with reset duration in ms */ snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_RESET", report->id, raw_size-1); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tDuration: 0x%02x%02x (%dms)\n", raw_data[2], raw_data[1], raw_data[2] << 8 | raw_data[1]); hid_debug_event(hdev, buff); break; case REPORT_LCD_CMD: /* 63 data bytes with LCD commands */ snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_LCD_CMD", report->id, raw_size-1); hid_debug_event(hdev, buff); /* TODO: format decoding */ break; case REPORT_LCD_DATA: /* 63 data bytes with LCD data */ snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_LCD_CMD", report->id, raw_size-1); /* TODO: format decoding */ hid_debug_event(hdev, buff); break; case REPORT_LCD_CMD_DATA: /* 63 data bytes with LCD commands and data */ snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_LCD_CMD", report->id, raw_size-1); /* TODO: format decoding */ hid_debug_event(hdev, buff); break; case REPORT_EE_READ: /* 3 data bytes with read area description */ snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_EE_READ", report->id, raw_size-1); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", raw_data[2], raw_data[1]); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); hid_debug_event(hdev, buff); break; case REPORT_EE_WRITE: /* 3+1..20 data bytes with write area description */ snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_EE_WRITE", report->id, raw_size-1); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", raw_data[2], raw_data[1]); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); hid_debug_event(hdev, buff); if (raw_data[3] == 0) { snprintf(buff, BUFF_SZ, "\tNo data\n"); } else if (raw_data[3] + 4 <= raw_size) { snprintf(buff, BUFF_SZ, "\tData: "); hid_debug_event(hdev, buff); dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]); } else { snprintf(buff, BUFF_SZ, "\tData overflowed\n"); } hid_debug_event(hdev, buff); break; case REPORT_ERASE_MEMORY: case REPORT_BL_ERASE_MEMORY: /* 3 data bytes with pointer inside erase block */ snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_ERASE_MEMORY", report->id, raw_size-1); hid_debug_event(hdev, buff); switch (data->addr_sz) { case 2: snprintf(buff, BUFF_SZ, "\tAddress inside 64 byte block: 0x%02x%02x\n", raw_data[2], raw_data[1]); break; case 3: snprintf(buff, BUFF_SZ, "\tAddress inside 64 byte block: 0x%02x%02x%02x\n", raw_data[3], raw_data[2], raw_data[1]); break; default: snprintf(buff, BUFF_SZ, "\tNot supported\n"); } hid_debug_event(hdev, buff); break; case REPORT_READ_MEMORY: case REPORT_BL_READ_MEMORY: /* 4 data bytes with read area description */ snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_READ_MEMORY", report->id, raw_size-1); hid_debug_event(hdev, buff); switch (data->addr_sz) { case 2: snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", raw_data[2], raw_data[1]); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); break; case 3: snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x%02x\n", raw_data[3], raw_data[2], raw_data[1]); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[4]); break; default: snprintf(buff, BUFF_SZ, "\tNot supported\n"); } hid_debug_event(hdev, buff); break; case REPORT_WRITE_MEMORY: case REPORT_BL_WRITE_MEMORY: /* 4+1..32 data bytes with write adrea description */ snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_WRITE_MEMORY", report->id, raw_size-1); hid_debug_event(hdev, buff); switch (data->addr_sz) { case 2: snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", raw_data[2], raw_data[1]); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); hid_debug_event(hdev, buff); if (raw_data[3] == 0) { snprintf(buff, BUFF_SZ, "\tNo data\n"); } else if (raw_data[3] + 4 <= raw_size) { snprintf(buff, BUFF_SZ, "\tData: "); hid_debug_event(hdev, buff); dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]); } else { snprintf(buff, BUFF_SZ, "\tData overflowed\n"); } break; case 3: snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x%02x\n", raw_data[3], raw_data[2], raw_data[1]); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[4]); hid_debug_event(hdev, buff); if (raw_data[4] == 0) { snprintf(buff, BUFF_SZ, "\tNo data\n"); } else if (raw_data[4] + 5 <= raw_size) { snprintf(buff, BUFF_SZ, "\tData: "); hid_debug_event(hdev, buff); dump_buff_as_hex(buff, BUFF_SZ, raw_data+5, raw_data[4]); } else { snprintf(buff, BUFF_SZ, "\tData overflowed\n"); } break; default: snprintf(buff, BUFF_SZ, "\tNot supported\n"); } hid_debug_event(hdev, buff); break; case REPORT_SPLASH_RESTART: /* TODO */ break; case REPORT_EXIT_KEYBOARD: snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_EXIT_KEYBOARD", report->id, raw_size-1); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tRestart delay: %dms (0x%02x%02x)\n", raw_data[1] | (raw_data[2] << 8), raw_data[2], raw_data[1]); hid_debug_event(hdev, buff); break; case REPORT_VERSION: snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_VERSION", report->id, raw_size-1); hid_debug_event(hdev, buff); break; case REPORT_DEVID: snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_DEVID", report->id, raw_size-1); hid_debug_event(hdev, buff); break; case REPORT_SPLASH_SIZE: snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_SPLASH_SIZE", report->id, raw_size-1); hid_debug_event(hdev, buff); break; case REPORT_HOOK_VERSION: snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_HOOK_VERSION", report->id, raw_size-1); hid_debug_event(hdev, buff); break; case REPORT_EXIT_FLASHER: snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "REPORT_VERSION", report->id, raw_size-1); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tRestart delay: %dms (0x%02x%02x)\n", raw_data[1] | (raw_data[2] << 8), raw_data[2], raw_data[1]); hid_debug_event(hdev, buff); break; default: snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", "<unknown>", report->id, raw_size-1); hid_debug_event(hdev, buff); break; } wake_up_interruptible(&hdev->debug_wait); kfree(raw_data); kfree(buff); } void picolcd_debug_raw_event(struct picolcd_data *data, struct hid_device *hdev, struct hid_report *report, u8 *raw_data, int size) { char *buff; #define BUFF_SZ 256 /* Avoid unnecessary overhead if debugfs is disabled */ if (list_empty(&hdev->debug_list)) return; buff = kmalloc(BUFF_SZ, GFP_ATOMIC); if (!buff) return; switch (report->id) { case REPORT_ERROR_CODE: /* 2 data bytes with affected report and error code */ snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", "REPORT_ERROR_CODE", report->id, size-1); hid_debug_event(hdev, buff); if (raw_data[2] < ARRAY_SIZE(error_codes)) snprintf(buff, BUFF_SZ, "\tError code 0x%02x (%s) in reply to report 0x%02x\n", raw_data[2], error_codes[raw_data[2]], raw_data[1]); else snprintf(buff, BUFF_SZ, "\tError code 0x%02x in reply to report 0x%02x\n", raw_data[2], raw_data[1]); hid_debug_event(hdev, buff); break; case REPORT_KEY_STATE: /* 2 data bytes with key state */ snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", "REPORT_KEY_STATE", report->id, size-1); hid_debug_event(hdev, buff); if (raw_data[1] == 0) snprintf(buff, BUFF_SZ, "\tNo key pressed\n"); else if (raw_data[2] == 0) snprintf(buff, BUFF_SZ, "\tOne key pressed: 0x%02x (%d)\n", raw_data[1], raw_data[1]); else snprintf(buff, BUFF_SZ, "\tTwo keys pressed: 0x%02x (%d), 0x%02x (%d)\n", raw_data[1], raw_data[1], raw_data[2], raw_data[2]); hid_debug_event(hdev, buff); break; case REPORT_IR_DATA: /* Up to 20 byes of IR scancode data */ snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", "REPORT_IR_DATA", report->id, size-1); hid_debug_event(hdev, buff); if (raw_data[1] == 0) { snprintf(buff, BUFF_SZ, "\tUnexpectedly 0 data length\n"); hid_debug_event(hdev, buff); } else if (raw_data[1] + 1 <= size) { snprintf(buff, BUFF_SZ, "\tData length: %d\n\tIR Data: ", raw_data[1]); hid_debug_event(hdev, buff); dump_buff_as_hex(buff, BUFF_SZ, raw_data+2, raw_data[1]); hid_debug_event(hdev, buff); } else { snprintf(buff, BUFF_SZ, "\tOverflowing data length: %d\n", raw_data[1]-1); hid_debug_event(hdev, buff); } break; case REPORT_EE_DATA: /* Data buffer in response to REPORT_EE_READ or REPORT_EE_WRITE */ snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", "REPORT_EE_DATA", report->id, size-1); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", raw_data[2], raw_data[1]); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); hid_debug_event(hdev, buff); if (raw_data[3] == 0) { snprintf(buff, BUFF_SZ, "\tNo data\n"); hid_debug_event(hdev, buff); } else if (raw_data[3] + 4 <= size) { snprintf(buff, BUFF_SZ, "\tData: "); hid_debug_event(hdev, buff); dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]); hid_debug_event(hdev, buff); } else { snprintf(buff, BUFF_SZ, "\tData overflowed\n"); hid_debug_event(hdev, buff); } break; case REPORT_MEMORY: /* Data buffer in response to REPORT_READ_MEMORY or REPORT_WRITE_MEMORY */ snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", "REPORT_MEMORY", report->id, size-1); hid_debug_event(hdev, buff); switch (data->addr_sz) { case 2: snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", raw_data[2], raw_data[1]); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); hid_debug_event(hdev, buff); if (raw_data[3] == 0) { snprintf(buff, BUFF_SZ, "\tNo data\n"); } else if (raw_data[3] + 4 <= size) { snprintf(buff, BUFF_SZ, "\tData: "); hid_debug_event(hdev, buff); dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]); } else { snprintf(buff, BUFF_SZ, "\tData overflowed\n"); } break; case 3: snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x%02x\n", raw_data[3], raw_data[2], raw_data[1]); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[4]); hid_debug_event(hdev, buff); if (raw_data[4] == 0) { snprintf(buff, BUFF_SZ, "\tNo data\n"); } else if (raw_data[4] + 5 <= size) { snprintf(buff, BUFF_SZ, "\tData: "); hid_debug_event(hdev, buff); dump_buff_as_hex(buff, BUFF_SZ, raw_data+5, raw_data[4]); } else { snprintf(buff, BUFF_SZ, "\tData overflowed\n"); } break; default: snprintf(buff, BUFF_SZ, "\tNot supported\n"); } hid_debug_event(hdev, buff); break; case REPORT_VERSION: snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", "REPORT_VERSION", report->id, size-1); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tFirmware version: %d.%d\n", raw_data[2], raw_data[1]); hid_debug_event(hdev, buff); break; case REPORT_BL_ERASE_MEMORY: snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", "REPORT_BL_ERASE_MEMORY", report->id, size-1); hid_debug_event(hdev, buff); /* TODO */ break; case REPORT_BL_READ_MEMORY: snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", "REPORT_BL_READ_MEMORY", report->id, size-1); hid_debug_event(hdev, buff); /* TODO */ break; case REPORT_BL_WRITE_MEMORY: snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", "REPORT_BL_WRITE_MEMORY", report->id, size-1); hid_debug_event(hdev, buff); /* TODO */ break; case REPORT_DEVID: snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", "REPORT_DEVID", report->id, size-1); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tSerial: 0x%02x%02x%02x%02x\n", raw_data[1], raw_data[2], raw_data[3], raw_data[4]); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tType: 0x%02x\n", raw_data[5]); hid_debug_event(hdev, buff); break; case REPORT_SPLASH_SIZE: snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", "REPORT_SPLASH_SIZE", report->id, size-1); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tTotal splash space: %d\n", (raw_data[2] << 8) | raw_data[1]); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tUsed splash space: %d\n", (raw_data[4] << 8) | raw_data[3]); hid_debug_event(hdev, buff); break; case REPORT_HOOK_VERSION: snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", "REPORT_HOOK_VERSION", report->id, size-1); hid_debug_event(hdev, buff); snprintf(buff, BUFF_SZ, "\tFirmware version: %d.%d\n", raw_data[1], raw_data[2]); hid_debug_event(hdev, buff); break; default: snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", "<unknown>", report->id, size-1); hid_debug_event(hdev, buff); break; } wake_up_interruptible(&hdev->debug_wait); kfree(buff); } void picolcd_init_devfs(struct picolcd_data *data, struct hid_report *eeprom_r, struct hid_report *eeprom_w, struct hid_report *flash_r, struct hid_report *flash_w, struct hid_report *reset) { struct hid_device *hdev = data->hdev; mutex_init(&data->mutex_flash); /* reset */ if (reset) data->debug_reset = debugfs_create_file("reset", 0600, hdev->debug_dir, data, &picolcd_debug_reset_fops); /* eeprom */ if (eeprom_r || eeprom_w) data->debug_eeprom = debugfs_create_file("eeprom", (eeprom_w ? S_IWUSR : 0) | (eeprom_r ? S_IRUSR : 0), hdev->debug_dir, data, &picolcd_debug_eeprom_fops); /* flash */ if (flash_r && flash_r->maxfield == 1 && flash_r->field[0]->report_size == 8) data->addr_sz = flash_r->field[0]->report_count - 1; else data->addr_sz = -1; if (data->addr_sz == 2 || data->addr_sz == 3) { data->debug_flash = debugfs_create_file("flash", (flash_w ? S_IWUSR : 0) | (flash_r ? S_IRUSR : 0), hdev->debug_dir, data, &picolcd_debug_flash_fops); } else if (flash_r || flash_w) hid_warn(hdev, "Unexpected FLASH access reports, please submit rdesc for review\n"); } void picolcd_exit_devfs(struct picolcd_data *data) { struct dentry *dent; dent = data->debug_reset; data->debug_reset = NULL; debugfs_remove(dent); dent = data->debug_eeprom; data->debug_eeprom = NULL; debugfs_remove(dent); dent = data->debug_flash; data->debug_flash = NULL; debugfs_remove(dent); mutex_destroy(&data->mutex_flash); } |
| 526 146 15 7 1904 13 980 86 624 50 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 | /* SPDX-License-Identifier: GPL-2.0 */ /* * NUMA memory policies for Linux. * Copyright 2003,2004 Andi Kleen SuSE Labs */ #ifndef _LINUX_MEMPOLICY_H #define _LINUX_MEMPOLICY_H 1 #include <linux/sched.h> #include <linux/mmzone.h> #include <linux/slab.h> #include <linux/rbtree.h> #include <linux/spinlock.h> #include <linux/node.h> #include <linux/nodemask.h> #include <linux/pagemap.h> #include <uapi/linux/mempolicy.h> struct mm_struct; #define NO_INTERLEAVE_INDEX (-1UL) /* use task il_prev for interleaving */ #ifdef CONFIG_NUMA /* * Describe a memory policy. * * A mempolicy can be either associated with a process or with a VMA. * For VMA related allocations the VMA policy is preferred, otherwise * the process policy is used. Interrupts ignore the memory policy * of the current process. * * Locking policy for interleave: * In process context there is no locking because only the process accesses * its own state. All vma manipulation is somewhat protected by a down_read on * mmap_lock. * * Freeing policy: * Mempolicy objects are reference counted. A mempolicy will be freed when * mpol_put() decrements the reference count to zero. * * Duplicating policy objects: * mpol_dup() allocates a new mempolicy and copies the specified mempolicy * to the new storage. The reference count of the new object is initialized * to 1, representing the caller of mpol_dup(). */ struct mempolicy { atomic_t refcnt; unsigned short mode; /* See MPOL_* above */ unsigned short flags; /* See set_mempolicy() MPOL_F_* above */ nodemask_t nodes; /* interleave/bind/preferred/etc */ int home_node; /* Home node to use for MPOL_BIND and MPOL_PREFERRED_MANY */ union { nodemask_t cpuset_mems_allowed; /* relative to these nodes */ nodemask_t user_nodemask; /* nodemask passed by user */ } w; }; /* * Support for managing mempolicy data objects (clone, copy, destroy) * The default fast path of a NULL MPOL_DEFAULT policy is always inlined. */ extern void __mpol_put(struct mempolicy *pol); static inline void mpol_put(struct mempolicy *pol) { if (pol) __mpol_put(pol); } /* * Does mempolicy pol need explicit unref after use? * Currently only needed for shared policies. */ static inline int mpol_needs_cond_ref(struct mempolicy *pol) { return (pol && (pol->flags & MPOL_F_SHARED)); } static inline void mpol_cond_put(struct mempolicy *pol) { if (mpol_needs_cond_ref(pol)) __mpol_put(pol); } extern struct mempolicy *__mpol_dup(struct mempolicy *pol); static inline struct mempolicy *mpol_dup(struct mempolicy *pol) { if (pol) pol = __mpol_dup(pol); return pol; } static inline void mpol_get(struct mempolicy *pol) { if (pol) atomic_inc(&pol->refcnt); } extern bool __mpol_equal(struct mempolicy *a, struct mempolicy *b); static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) { if (a == b) return true; return __mpol_equal(a, b); } /* * Tree of shared policies for a shared memory region. */ struct shared_policy { struct rb_root root; rwlock_t lock; }; struct sp_node { struct rb_node nd; pgoff_t start, end; struct mempolicy *policy; }; int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst); void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); int mpol_set_shared_policy(struct shared_policy *sp, struct vm_area_struct *vma, struct mempolicy *mpol); void mpol_free_shared_policy(struct shared_policy *sp); struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx); struct mempolicy *get_task_policy(struct task_struct *p); struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, unsigned long addr, pgoff_t *ilx); struct mempolicy *get_vma_policy(struct vm_area_struct *vma, unsigned long addr, int order, pgoff_t *ilx); bool vma_policy_mof(struct vm_area_struct *vma); extern void numa_default_policy(void); extern void numa_policy_init(void); extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new); extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); extern int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, nodemask_t **nodemask); extern bool init_nodemask_of_mempolicy(nodemask_t *mask); extern bool mempolicy_in_oom_domain(struct task_struct *tsk, const nodemask_t *mask); extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; static inline void check_highest_zone(enum zone_type k) { if (k > policy_zone && k != ZONE_MOVABLE) policy_zone = k; } int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags); #ifdef CONFIG_TMPFS extern int mpol_parse_str(char *str, struct mempolicy **mpol); #endif extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ extern bool vma_migratable(struct vm_area_struct *vma); int mpol_misplaced(struct folio *folio, struct vm_fault *vmf, unsigned long addr); extern void mpol_put_task_policy(struct task_struct *); static inline bool mpol_is_preferred_many(struct mempolicy *pol) { return (pol->mode == MPOL_PREFERRED_MANY); } extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone); extern int mempolicy_set_node_perf(unsigned int node, struct access_coordinate *coords); #else struct mempolicy {}; static inline struct mempolicy *get_task_policy(struct task_struct *p) { return NULL; } static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) { return true; } static inline void mpol_put(struct mempolicy *pol) { } static inline void mpol_cond_put(struct mempolicy *pol) { } static inline void mpol_get(struct mempolicy *pol) { } struct shared_policy {}; static inline void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) { } static inline void mpol_free_shared_policy(struct shared_policy *sp) { } static inline struct mempolicy * mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx) { return NULL; } static inline struct mempolicy *get_vma_policy(struct vm_area_struct *vma, unsigned long addr, int order, pgoff_t *ilx) { *ilx = 0; return NULL; } static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { return 0; } static inline void numa_policy_init(void) { } static inline void numa_default_policy(void) { } static inline void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) { } static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) { } static inline int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, nodemask_t **nodemask) { *mpol = NULL; *nodemask = NULL; return 0; } static inline bool init_nodemask_of_mempolicy(nodemask_t *m) { return false; } static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags) { return 0; } static inline void check_highest_zone(int k) { } #ifdef CONFIG_TMPFS static inline int mpol_parse_str(char *str, struct mempolicy **mpol) { return 1; /* error */ } #endif static inline int mpol_misplaced(struct folio *folio, struct vm_fault *vmf, unsigned long address) { return -1; /* no node preference */ } static inline void mpol_put_task_policy(struct task_struct *task) { } static inline bool mpol_is_preferred_many(struct mempolicy *pol) { return false; } #endif /* CONFIG_NUMA */ #endif |
| 1 1 1 1 1 1 1 1 1 2 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 | // SPDX-License-Identifier: GPL-2.0-only /* * Edirol UA-101/UA-1000 driver * Copyright (c) Clemens Ladisch <clemens@ladisch.de> */ #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/usb/audio.h> #include <sound/core.h> #include <sound/initval.h> #include <sound/pcm.h> #include <sound/pcm_params.h> #include "../usbaudio.h" #include "../midi.h" MODULE_DESCRIPTION("Edirol UA-101/1000 driver"); MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>"); MODULE_LICENSE("GPL v2"); /* * Should not be lower than the minimum scheduling delay of the host * controller. Some Intel controllers need more than one frame; as long as * that driver doesn't tell us about this, use 1.5 frames just to be sure. */ #define MIN_QUEUE_LENGTH 12 /* Somewhat random. */ #define MAX_QUEUE_LENGTH 30 /* * This magic value optimizes memory usage efficiency for the UA-101's packet * sizes at all sample rates, taking into account the stupid cache pool sizes * that usb_alloc_coherent() uses. */ #define DEFAULT_QUEUE_LENGTH 21 #define MAX_PACKET_SIZE 672 /* hardware specific */ #define MAX_MEMORY_BUFFERS DIV_ROUND_UP(MAX_QUEUE_LENGTH, \ PAGE_SIZE / MAX_PACKET_SIZE) static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; static unsigned int queue_length = 21; module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "card index"); module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string"); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "enable card"); module_param(queue_length, uint, 0644); MODULE_PARM_DESC(queue_length, "USB queue length in microframes, " __stringify(MIN_QUEUE_LENGTH)"-"__stringify(MAX_QUEUE_LENGTH)); enum { INTF_PLAYBACK, INTF_CAPTURE, INTF_MIDI, INTF_COUNT }; /* bits in struct ua101::states */ enum { USB_CAPTURE_RUNNING, USB_PLAYBACK_RUNNING, ALSA_CAPTURE_OPEN, ALSA_PLAYBACK_OPEN, ALSA_CAPTURE_RUNNING, ALSA_PLAYBACK_RUNNING, CAPTURE_URB_COMPLETED, PLAYBACK_URB_COMPLETED, DISCONNECTED, }; struct ua101 { struct usb_device *dev; struct snd_card *card; struct usb_interface *intf[INTF_COUNT]; int card_index; struct snd_pcm *pcm; struct list_head midi_list; u64 format_bit; unsigned int rate; unsigned int packets_per_second; spinlock_t lock; struct mutex mutex; unsigned long states; /* FIFO to synchronize playback rate to capture rate */ unsigned int rate_feedback_start; unsigned int rate_feedback_count; u8 rate_feedback[MAX_QUEUE_LENGTH]; struct list_head ready_playback_urbs; struct work_struct playback_work; wait_queue_head_t alsa_capture_wait; wait_queue_head_t rate_feedback_wait; wait_queue_head_t alsa_playback_wait; struct ua101_stream { struct snd_pcm_substream *substream; unsigned int usb_pipe; unsigned int channels; unsigned int frame_bytes; unsigned int max_packet_bytes; unsigned int period_pos; unsigned int buffer_pos; unsigned int queue_length; struct ua101_urb { struct urb urb; struct usb_iso_packet_descriptor iso_frame_desc[1]; struct list_head ready_list; } *urbs[MAX_QUEUE_LENGTH]; struct { unsigned int size; void *addr; dma_addr_t dma; } buffers[MAX_MEMORY_BUFFERS]; } capture, playback; }; static DEFINE_MUTEX(devices_mutex); static unsigned int devices_used; static struct usb_driver ua101_driver; static void abort_alsa_playback(struct ua101 *ua); static void abort_alsa_capture(struct ua101 *ua); static const char *usb_error_string(int err) { switch (err) { case -ENODEV: return "no device"; case -ENOENT: return "endpoint not enabled"; case -EPIPE: return "endpoint stalled"; case -ENOSPC: return "not enough bandwidth"; case -ESHUTDOWN: return "device disabled"; case -EHOSTUNREACH: return "device suspended"; case -EINVAL: case -EAGAIN: case -EFBIG: case -EMSGSIZE: return "internal error"; default: return "unknown error"; } } static void abort_usb_capture(struct ua101 *ua) { if (test_and_clear_bit(USB_CAPTURE_RUNNING, &ua->states)) { wake_up(&ua->alsa_capture_wait); wake_up(&ua->rate_feedback_wait); } } static void abort_usb_playback(struct ua101 *ua) { if (test_and_clear_bit(USB_PLAYBACK_RUNNING, &ua->states)) wake_up(&ua->alsa_playback_wait); } static void playback_urb_complete(struct urb *usb_urb) { struct ua101_urb *urb = (struct ua101_urb *)usb_urb; struct ua101 *ua = urb->urb.context; if (unlikely(urb->urb.status == -ENOENT || /* unlinked */ urb->urb.status == -ENODEV || /* device removed */ urb->urb.status == -ECONNRESET || /* unlinked */ urb->urb.status == -ESHUTDOWN)) { /* device disabled */ abort_usb_playback(ua); abort_alsa_playback(ua); return; } if (test_bit(USB_PLAYBACK_RUNNING, &ua->states)) { /* append URB to FIFO */ guard(spinlock_irqsave)(&ua->lock); list_add_tail(&urb->ready_list, &ua->ready_playback_urbs); if (ua->rate_feedback_count > 0) queue_work(system_highpri_wq, &ua->playback_work); ua->playback.substream->runtime->delay -= urb->urb.iso_frame_desc[0].length / ua->playback.frame_bytes; } } static void first_playback_urb_complete(struct urb *urb) { struct ua101 *ua = urb->context; urb->complete = playback_urb_complete; playback_urb_complete(urb); set_bit(PLAYBACK_URB_COMPLETED, &ua->states); wake_up(&ua->alsa_playback_wait); } /* copy data from the ALSA ring buffer into the URB buffer */ static bool copy_playback_data(struct ua101_stream *stream, struct urb *urb, unsigned int frames) { struct snd_pcm_runtime *runtime; unsigned int frame_bytes, frames1; const u8 *source; runtime = stream->substream->runtime; frame_bytes = stream->frame_bytes; source = runtime->dma_area + stream->buffer_pos * frame_bytes; if (stream->buffer_pos + frames <= runtime->buffer_size) { memcpy(urb->transfer_buffer, source, frames * frame_bytes); } else { /* wrap around at end of ring buffer */ frames1 = runtime->buffer_size - stream->buffer_pos; memcpy(urb->transfer_buffer, source, frames1 * frame_bytes); memcpy(urb->transfer_buffer + frames1 * frame_bytes, runtime->dma_area, (frames - frames1) * frame_bytes); } stream->buffer_pos += frames; if (stream->buffer_pos >= runtime->buffer_size) stream->buffer_pos -= runtime->buffer_size; stream->period_pos += frames; if (stream->period_pos >= runtime->period_size) { stream->period_pos -= runtime->period_size; return true; } return false; } static inline void add_with_wraparound(struct ua101 *ua, unsigned int *value, unsigned int add) { *value += add; if (*value >= ua->playback.queue_length) *value -= ua->playback.queue_length; } static void playback_work(struct work_struct *work) { struct ua101 *ua = container_of(work, struct ua101, playback_work); unsigned int frames; struct ua101_urb *urb; bool do_period_elapsed = false; int err; if (unlikely(!test_bit(USB_PLAYBACK_RUNNING, &ua->states))) return; /* * Synchronizing the playback rate to the capture rate is done by using * the same sequence of packet sizes for both streams. * Submitting a playback URB therefore requires both a ready URB and * the size of the corresponding capture packet, i.e., both playback * and capture URBs must have been completed. Since the USB core does * not guarantee that playback and capture complete callbacks are * called alternately, we use two FIFOs for packet sizes and read URBs; * submitting playback URBs is possible as long as both FIFOs are * nonempty. */ scoped_guard(spinlock_irqsave, &ua->lock) { while (ua->rate_feedback_count > 0 && !list_empty(&ua->ready_playback_urbs)) { /* take packet size out of FIFO */ frames = ua->rate_feedback[ua->rate_feedback_start]; add_with_wraparound(ua, &ua->rate_feedback_start, 1); ua->rate_feedback_count--; /* take URB out of FIFO */ urb = list_first_entry(&ua->ready_playback_urbs, struct ua101_urb, ready_list); list_del(&urb->ready_list); /* fill packet with data or silence */ urb->urb.iso_frame_desc[0].length = frames * ua->playback.frame_bytes; if (test_bit(ALSA_PLAYBACK_RUNNING, &ua->states)) do_period_elapsed |= copy_playback_data(&ua->playback, &urb->urb, frames); else memset(urb->urb.transfer_buffer, 0, urb->urb.iso_frame_desc[0].length); /* and off you go ... */ err = usb_submit_urb(&urb->urb, GFP_ATOMIC); if (unlikely(err < 0)) { abort_usb_playback(ua); abort_alsa_playback(ua); dev_err(&ua->dev->dev, "USB request error %d: %s\n", err, usb_error_string(err)); return; } ua->playback.substream->runtime->delay += frames; } } if (do_period_elapsed) snd_pcm_period_elapsed(ua->playback.substream); } /* copy data from the URB buffer into the ALSA ring buffer */ static bool copy_capture_data(struct ua101_stream *stream, struct urb *urb, unsigned int frames) { struct snd_pcm_runtime *runtime; unsigned int frame_bytes, frames1; u8 *dest; runtime = stream->substream->runtime; frame_bytes = stream->frame_bytes; dest = runtime->dma_area + stream->buffer_pos * frame_bytes; if (stream->buffer_pos + frames <= runtime->buffer_size) { memcpy(dest, urb->transfer_buffer, frames * frame_bytes); } else { /* wrap around at end of ring buffer */ frames1 = runtime->buffer_size - stream->buffer_pos; memcpy(dest, urb->transfer_buffer, frames1 * frame_bytes); memcpy(runtime->dma_area, urb->transfer_buffer + frames1 * frame_bytes, (frames - frames1) * frame_bytes); } stream->buffer_pos += frames; if (stream->buffer_pos >= runtime->buffer_size) stream->buffer_pos -= runtime->buffer_size; stream->period_pos += frames; if (stream->period_pos >= runtime->period_size) { stream->period_pos -= runtime->period_size; return true; } return false; } static void capture_urb_complete(struct urb *urb) { struct ua101 *ua = urb->context; struct ua101_stream *stream = &ua->capture; unsigned int frames, write_ptr; bool do_period_elapsed; int err; if (unlikely(urb->status == -ENOENT || /* unlinked */ urb->status == -ENODEV || /* device removed */ urb->status == -ECONNRESET || /* unlinked */ urb->status == -ESHUTDOWN)) /* device disabled */ goto stream_stopped; if (urb->status >= 0 && urb->iso_frame_desc[0].status >= 0) frames = urb->iso_frame_desc[0].actual_length / stream->frame_bytes; else frames = 0; scoped_guard(spinlock_irqsave, &ua->lock) { if (frames > 0 && test_bit(ALSA_CAPTURE_RUNNING, &ua->states)) do_period_elapsed = copy_capture_data(stream, urb, frames); else do_period_elapsed = false; if (test_bit(USB_CAPTURE_RUNNING, &ua->states)) { err = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(err < 0)) { dev_err(&ua->dev->dev, "USB request error %d: %s\n", err, usb_error_string(err)); goto stream_stopped; } /* append packet size to FIFO */ write_ptr = ua->rate_feedback_start; add_with_wraparound(ua, &write_ptr, ua->rate_feedback_count); ua->rate_feedback[write_ptr] = frames; if (ua->rate_feedback_count < ua->playback.queue_length) { ua->rate_feedback_count++; if (ua->rate_feedback_count == ua->playback.queue_length) wake_up(&ua->rate_feedback_wait); } else { /* * Ring buffer overflow; this happens when the playback * stream is not running. Throw away the oldest entry, * so that the playback stream, when it starts, sees * the most recent packet sizes. */ add_with_wraparound(ua, &ua->rate_feedback_start, 1); } if (test_bit(USB_PLAYBACK_RUNNING, &ua->states) && !list_empty(&ua->ready_playback_urbs)) queue_work(system_highpri_wq, &ua->playback_work); } } if (do_period_elapsed) snd_pcm_period_elapsed(stream->substream); return; stream_stopped: abort_usb_playback(ua); abort_usb_capture(ua); abort_alsa_playback(ua); abort_alsa_capture(ua); } static void first_capture_urb_complete(struct urb *urb) { struct ua101 *ua = urb->context; urb->complete = capture_urb_complete; capture_urb_complete(urb); set_bit(CAPTURE_URB_COMPLETED, &ua->states); wake_up(&ua->alsa_capture_wait); } static int submit_stream_urbs(struct ua101 *ua, struct ua101_stream *stream) { unsigned int i; for (i = 0; i < stream->queue_length; ++i) { int err = usb_submit_urb(&stream->urbs[i]->urb, GFP_KERNEL); if (err < 0) { dev_err(&ua->dev->dev, "USB request error %d: %s\n", err, usb_error_string(err)); return err; } } return 0; } static void kill_stream_urbs(struct ua101_stream *stream) { unsigned int i; for (i = 0; i < stream->queue_length; ++i) if (stream->urbs[i]) usb_kill_urb(&stream->urbs[i]->urb); } static int enable_iso_interface(struct ua101 *ua, unsigned int intf_index) { struct usb_host_interface *alts; alts = ua->intf[intf_index]->cur_altsetting; if (alts->desc.bAlternateSetting != 1) { int err = usb_set_interface(ua->dev, alts->desc.bInterfaceNumber, 1); if (err < 0) { dev_err(&ua->dev->dev, "cannot initialize interface; error %d: %s\n", err, usb_error_string(err)); return err; } } return 0; } static void disable_iso_interface(struct ua101 *ua, unsigned int intf_index) { struct usb_host_interface *alts; if (!ua->intf[intf_index]) return; alts = ua->intf[intf_index]->cur_altsetting; if (alts->desc.bAlternateSetting != 0) { int err = usb_set_interface(ua->dev, alts->desc.bInterfaceNumber, 0); if (err < 0 && !test_bit(DISCONNECTED, &ua->states)) dev_warn(&ua->dev->dev, "interface reset failed; error %d: %s\n", err, usb_error_string(err)); } } static void stop_usb_capture(struct ua101 *ua) { clear_bit(USB_CAPTURE_RUNNING, &ua->states); kill_stream_urbs(&ua->capture); disable_iso_interface(ua, INTF_CAPTURE); } static int start_usb_capture(struct ua101 *ua) { int err; if (test_bit(DISCONNECTED, &ua->states)) return -ENODEV; if (test_bit(USB_CAPTURE_RUNNING, &ua->states)) return 0; kill_stream_urbs(&ua->capture); err = enable_iso_interface(ua, INTF_CAPTURE); if (err < 0) return err; clear_bit(CAPTURE_URB_COMPLETED, &ua->states); ua->capture.urbs[0]->urb.complete = first_capture_urb_complete; ua->rate_feedback_start = 0; ua->rate_feedback_count = 0; set_bit(USB_CAPTURE_RUNNING, &ua->states); err = submit_stream_urbs(ua, &ua->capture); if (err < 0) stop_usb_capture(ua); return err; } static void stop_usb_playback(struct ua101 *ua) { clear_bit(USB_PLAYBACK_RUNNING, &ua->states); kill_stream_urbs(&ua->playback); cancel_work_sync(&ua->playback_work); disable_iso_interface(ua, INTF_PLAYBACK); } static int start_usb_playback(struct ua101 *ua) { unsigned int i, frames; struct urb *urb; int err = 0; if (test_bit(DISCONNECTED, &ua->states)) return -ENODEV; if (test_bit(USB_PLAYBACK_RUNNING, &ua->states)) return 0; kill_stream_urbs(&ua->playback); cancel_work_sync(&ua->playback_work); err = enable_iso_interface(ua, INTF_PLAYBACK); if (err < 0) return err; clear_bit(PLAYBACK_URB_COMPLETED, &ua->states); ua->playback.urbs[0]->urb.complete = first_playback_urb_complete; scoped_guard(spinlock_irq, &ua->lock) { INIT_LIST_HEAD(&ua->ready_playback_urbs); } /* * We submit the initial URBs all at once, so we have to wait for the * packet size FIFO to be full. */ wait_event(ua->rate_feedback_wait, ua->rate_feedback_count >= ua->playback.queue_length || !test_bit(USB_CAPTURE_RUNNING, &ua->states) || test_bit(DISCONNECTED, &ua->states)); if (test_bit(DISCONNECTED, &ua->states)) { stop_usb_playback(ua); return -ENODEV; } if (!test_bit(USB_CAPTURE_RUNNING, &ua->states)) { stop_usb_playback(ua); return -EIO; } for (i = 0; i < ua->playback.queue_length; ++i) { /* all initial URBs contain silence */ scoped_guard(spinlock_irq, &ua->lock) { frames = ua->rate_feedback[ua->rate_feedback_start]; add_with_wraparound(ua, &ua->rate_feedback_start, 1); ua->rate_feedback_count--; } urb = &ua->playback.urbs[i]->urb; urb->iso_frame_desc[0].length = frames * ua->playback.frame_bytes; memset(urb->transfer_buffer, 0, urb->iso_frame_desc[0].length); } set_bit(USB_PLAYBACK_RUNNING, &ua->states); err = submit_stream_urbs(ua, &ua->playback); if (err < 0) stop_usb_playback(ua); return err; } static void abort_alsa_capture(struct ua101 *ua) { if (test_bit(ALSA_CAPTURE_RUNNING, &ua->states)) snd_pcm_stop_xrun(ua->capture.substream); } static void abort_alsa_playback(struct ua101 *ua) { if (test_bit(ALSA_PLAYBACK_RUNNING, &ua->states)) snd_pcm_stop_xrun(ua->playback.substream); } static int set_stream_hw(struct ua101 *ua, struct snd_pcm_substream *substream, unsigned int channels) { int err; substream->runtime->hw.info = SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_BATCH | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_FIFO_IN_FRAMES; substream->runtime->hw.formats = ua->format_bit; substream->runtime->hw.rates = snd_pcm_rate_to_rate_bit(ua->rate); substream->runtime->hw.rate_min = ua->rate; substream->runtime->hw.rate_max = ua->rate; substream->runtime->hw.channels_min = channels; substream->runtime->hw.channels_max = channels; substream->runtime->hw.buffer_bytes_max = 45000 * 1024; substream->runtime->hw.period_bytes_min = 1; substream->runtime->hw.period_bytes_max = UINT_MAX; substream->runtime->hw.periods_min = 2; substream->runtime->hw.periods_max = UINT_MAX; err = snd_pcm_hw_constraint_minmax(substream->runtime, SNDRV_PCM_HW_PARAM_PERIOD_TIME, 1500000 / ua->packets_per_second, UINT_MAX); if (err < 0) return err; err = snd_pcm_hw_constraint_msbits(substream->runtime, 0, 32, 24); return err; } static int capture_pcm_open(struct snd_pcm_substream *substream) { struct ua101 *ua = substream->private_data; int err; ua->capture.substream = substream; err = set_stream_hw(ua, substream, ua->capture.channels); if (err < 0) return err; substream->runtime->hw.fifo_size = DIV_ROUND_CLOSEST(ua->rate, ua->packets_per_second); substream->runtime->delay = substream->runtime->hw.fifo_size; guard(mutex)(&ua->mutex); err = start_usb_capture(ua); if (err >= 0) set_bit(ALSA_CAPTURE_OPEN, &ua->states); return err; } static int playback_pcm_open(struct snd_pcm_substream *substream) { struct ua101 *ua = substream->private_data; int err; ua->playback.substream = substream; err = set_stream_hw(ua, substream, ua->playback.channels); if (err < 0) return err; substream->runtime->hw.fifo_size = DIV_ROUND_CLOSEST(ua->rate * ua->playback.queue_length, ua->packets_per_second); guard(mutex)(&ua->mutex); err = start_usb_capture(ua); if (err < 0) return err; err = start_usb_playback(ua); if (err < 0) { if (!test_bit(ALSA_CAPTURE_OPEN, &ua->states)) stop_usb_capture(ua); return err; } set_bit(ALSA_PLAYBACK_OPEN, &ua->states); return 0; } static int capture_pcm_close(struct snd_pcm_substream *substream) { struct ua101 *ua = substream->private_data; guard(mutex)(&ua->mutex); clear_bit(ALSA_CAPTURE_OPEN, &ua->states); if (!test_bit(ALSA_PLAYBACK_OPEN, &ua->states)) stop_usb_capture(ua); return 0; } static int playback_pcm_close(struct snd_pcm_substream *substream) { struct ua101 *ua = substream->private_data; guard(mutex)(&ua->mutex); stop_usb_playback(ua); clear_bit(ALSA_PLAYBACK_OPEN, &ua->states); if (!test_bit(ALSA_CAPTURE_OPEN, &ua->states)) stop_usb_capture(ua); return 0; } static int capture_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params) { struct ua101 *ua = substream->private_data; guard(mutex)(&ua->mutex); return start_usb_capture(ua); } static int playback_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params) { struct ua101 *ua = substream->private_data; int err; guard(mutex)(&ua->mutex); err = start_usb_capture(ua); if (err >= 0) err = start_usb_playback(ua); return err; } static int capture_pcm_prepare(struct snd_pcm_substream *substream) { struct ua101 *ua = substream->private_data; int err; scoped_guard(mutex, &ua->mutex) { err = start_usb_capture(ua); } if (err < 0) return err; /* * The EHCI driver schedules the first packet of an iso stream at 10 ms * in the future, i.e., no data is actually captured for that long. * Take the wait here so that the stream is known to be actually * running when the start trigger has been called. */ wait_event(ua->alsa_capture_wait, test_bit(CAPTURE_URB_COMPLETED, &ua->states) || !test_bit(USB_CAPTURE_RUNNING, &ua->states)); if (test_bit(DISCONNECTED, &ua->states)) return -ENODEV; if (!test_bit(USB_CAPTURE_RUNNING, &ua->states)) return -EIO; ua->capture.period_pos = 0; ua->capture.buffer_pos = 0; return 0; } static int playback_pcm_prepare(struct snd_pcm_substream *substream) { struct ua101 *ua = substream->private_data; int err; scoped_guard(mutex, &ua->mutex) { err = start_usb_capture(ua); if (err >= 0) err = start_usb_playback(ua); } if (err < 0) return err; /* see the comment in capture_pcm_prepare() */ wait_event(ua->alsa_playback_wait, test_bit(PLAYBACK_URB_COMPLETED, &ua->states) || !test_bit(USB_PLAYBACK_RUNNING, &ua->states)); if (test_bit(DISCONNECTED, &ua->states)) return -ENODEV; if (!test_bit(USB_PLAYBACK_RUNNING, &ua->states)) return -EIO; substream->runtime->delay = 0; ua->playback.period_pos = 0; ua->playback.buffer_pos = 0; return 0; } static int capture_pcm_trigger(struct snd_pcm_substream *substream, int cmd) { struct ua101 *ua = substream->private_data; switch (cmd) { case SNDRV_PCM_TRIGGER_START: if (!test_bit(USB_CAPTURE_RUNNING, &ua->states)) return -EIO; set_bit(ALSA_CAPTURE_RUNNING, &ua->states); return 0; case SNDRV_PCM_TRIGGER_STOP: clear_bit(ALSA_CAPTURE_RUNNING, &ua->states); return 0; default: return -EINVAL; } } static int playback_pcm_trigger(struct snd_pcm_substream *substream, int cmd) { struct ua101 *ua = substream->private_data; switch (cmd) { case SNDRV_PCM_TRIGGER_START: if (!test_bit(USB_PLAYBACK_RUNNING, &ua->states)) return -EIO; set_bit(ALSA_PLAYBACK_RUNNING, &ua->states); return 0; case SNDRV_PCM_TRIGGER_STOP: clear_bit(ALSA_PLAYBACK_RUNNING, &ua->states); return 0; default: return -EINVAL; } } static inline snd_pcm_uframes_t ua101_pcm_pointer(struct ua101 *ua, struct ua101_stream *stream) { guard(spinlock_irqsave)(&ua->lock); return stream->buffer_pos; } static snd_pcm_uframes_t capture_pcm_pointer(struct snd_pcm_substream *subs) { struct ua101 *ua = subs->private_data; return ua101_pcm_pointer(ua, &ua->capture); } static snd_pcm_uframes_t playback_pcm_pointer(struct snd_pcm_substream *subs) { struct ua101 *ua = subs->private_data; return ua101_pcm_pointer(ua, &ua->playback); } static const struct snd_pcm_ops capture_pcm_ops = { .open = capture_pcm_open, .close = capture_pcm_close, .hw_params = capture_pcm_hw_params, .prepare = capture_pcm_prepare, .trigger = capture_pcm_trigger, .pointer = capture_pcm_pointer, }; static const struct snd_pcm_ops playback_pcm_ops = { .open = playback_pcm_open, .close = playback_pcm_close, .hw_params = playback_pcm_hw_params, .prepare = playback_pcm_prepare, .trigger = playback_pcm_trigger, .pointer = playback_pcm_pointer, }; static const struct uac_format_type_i_discrete_descriptor * find_format_descriptor(struct usb_interface *interface) { struct usb_host_interface *alt; u8 *extra; int extralen; if (interface->num_altsetting != 2) { dev_err(&interface->dev, "invalid num_altsetting\n"); return NULL; } alt = &interface->altsetting[0]; if (alt->desc.bNumEndpoints != 0) { dev_err(&interface->dev, "invalid bNumEndpoints\n"); return NULL; } alt = &interface->altsetting[1]; if (alt->desc.bNumEndpoints != 1) { dev_err(&interface->dev, "invalid bNumEndpoints\n"); return NULL; } extra = alt->extra; extralen = alt->extralen; while (extralen >= sizeof(struct usb_descriptor_header)) { struct uac_format_type_i_discrete_descriptor *desc; desc = (struct uac_format_type_i_discrete_descriptor *)extra; if (desc->bLength > extralen) { dev_err(&interface->dev, "descriptor overflow\n"); return NULL; } if (desc->bLength == UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(1) && desc->bDescriptorType == USB_DT_CS_INTERFACE && desc->bDescriptorSubtype == UAC_FORMAT_TYPE) { if (desc->bFormatType != UAC_FORMAT_TYPE_I_PCM || desc->bSamFreqType != 1) { dev_err(&interface->dev, "invalid format type\n"); return NULL; } return desc; } extralen -= desc->bLength; extra += desc->bLength; } dev_err(&interface->dev, "sample format descriptor not found\n"); return NULL; } static int detect_usb_format(struct ua101 *ua) { const struct uac_format_type_i_discrete_descriptor *fmt_capture; const struct uac_format_type_i_discrete_descriptor *fmt_playback; const struct usb_endpoint_descriptor *epd; unsigned int rate2; fmt_capture = find_format_descriptor(ua->intf[INTF_CAPTURE]); fmt_playback = find_format_descriptor(ua->intf[INTF_PLAYBACK]); if (!fmt_capture || !fmt_playback) return -ENXIO; switch (fmt_capture->bSubframeSize) { case 3: ua->format_bit = SNDRV_PCM_FMTBIT_S24_3LE; break; case 4: ua->format_bit = SNDRV_PCM_FMTBIT_S32_LE; break; default: dev_err(&ua->dev->dev, "sample width is not 24 or 32 bits\n"); return -ENXIO; } if (fmt_capture->bSubframeSize != fmt_playback->bSubframeSize) { dev_err(&ua->dev->dev, "playback/capture sample widths do not match\n"); return -ENXIO; } if (fmt_capture->bBitResolution != 24 || fmt_playback->bBitResolution != 24) { dev_err(&ua->dev->dev, "sample width is not 24 bits\n"); return -ENXIO; } ua->rate = combine_triple(fmt_capture->tSamFreq[0]); rate2 = combine_triple(fmt_playback->tSamFreq[0]); if (ua->rate != rate2) { dev_err(&ua->dev->dev, "playback/capture rates do not match: %u/%u\n", rate2, ua->rate); return -ENXIO; } switch (ua->dev->speed) { case USB_SPEED_FULL: ua->packets_per_second = 1000; break; case USB_SPEED_HIGH: ua->packets_per_second = 8000; break; default: dev_err(&ua->dev->dev, "unknown device speed\n"); return -ENXIO; } ua->capture.channels = fmt_capture->bNrChannels; ua->playback.channels = fmt_playback->bNrChannels; ua->capture.frame_bytes = fmt_capture->bSubframeSize * ua->capture.channels; ua->playback.frame_bytes = fmt_playback->bSubframeSize * ua->playback.channels; epd = &ua->intf[INTF_CAPTURE]->altsetting[1].endpoint[0].desc; if (!usb_endpoint_is_isoc_in(epd) || usb_endpoint_maxp(epd) == 0) { dev_err(&ua->dev->dev, "invalid capture endpoint\n"); return -ENXIO; } ua->capture.usb_pipe = usb_rcvisocpipe(ua->dev, usb_endpoint_num(epd)); ua->capture.max_packet_bytes = usb_endpoint_maxp(epd); epd = &ua->intf[INTF_PLAYBACK]->altsetting[1].endpoint[0].desc; if (!usb_endpoint_is_isoc_out(epd) || usb_endpoint_maxp(epd) == 0) { dev_err(&ua->dev->dev, "invalid playback endpoint\n"); return -ENXIO; } ua->playback.usb_pipe = usb_sndisocpipe(ua->dev, usb_endpoint_num(epd)); ua->playback.max_packet_bytes = usb_endpoint_maxp(epd); return 0; } static int alloc_stream_buffers(struct ua101 *ua, struct ua101_stream *stream) { unsigned int remaining_packets, packets, packets_per_page, i; size_t size; stream->queue_length = queue_length; stream->queue_length = max(stream->queue_length, (unsigned int)MIN_QUEUE_LENGTH); stream->queue_length = min(stream->queue_length, (unsigned int)MAX_QUEUE_LENGTH); /* * The cache pool sizes used by usb_alloc_coherent() (128, 512, 2048) are * quite bad when used with the packet sizes of this device (e.g. 280, * 520, 624). Therefore, we allocate and subdivide entire pages, using * a smaller buffer only for the last chunk. */ remaining_packets = stream->queue_length; packets_per_page = PAGE_SIZE / stream->max_packet_bytes; for (i = 0; i < ARRAY_SIZE(stream->buffers); ++i) { packets = min(remaining_packets, packets_per_page); size = packets * stream->max_packet_bytes; stream->buffers[i].addr = usb_alloc_coherent(ua->dev, size, GFP_KERNEL, &stream->buffers[i].dma); if (!stream->buffers[i].addr) return -ENOMEM; stream->buffers[i].size = size; remaining_packets -= packets; if (!remaining_packets) break; } if (remaining_packets) { dev_err(&ua->dev->dev, "too many packets\n"); return -ENXIO; } return 0; } static void free_stream_buffers(struct ua101 *ua, struct ua101_stream *stream) { unsigned int i; for (i = 0; i < ARRAY_SIZE(stream->buffers); ++i) usb_free_coherent(ua->dev, stream->buffers[i].size, stream->buffers[i].addr, stream->buffers[i].dma); } static int alloc_stream_urbs(struct ua101 *ua, struct ua101_stream *stream, void (*urb_complete)(struct urb *)) { unsigned max_packet_size = stream->max_packet_bytes; struct ua101_urb *urb; unsigned int b, u = 0; for (b = 0; b < ARRAY_SIZE(stream->buffers); ++b) { unsigned int size = stream->buffers[b].size; u8 *addr = stream->buffers[b].addr; dma_addr_t dma = stream->buffers[b].dma; while (size >= max_packet_size) { if (u >= stream->queue_length) goto bufsize_error; urb = kmalloc(sizeof(*urb), GFP_KERNEL); if (!urb) return -ENOMEM; usb_init_urb(&urb->urb); urb->urb.dev = ua->dev; urb->urb.pipe = stream->usb_pipe; urb->urb.transfer_flags = URB_NO_TRANSFER_DMA_MAP; urb->urb.transfer_buffer = addr; urb->urb.transfer_dma = dma; urb->urb.transfer_buffer_length = max_packet_size; urb->urb.number_of_packets = 1; urb->urb.interval = 1; urb->urb.context = ua; urb->urb.complete = urb_complete; urb->urb.iso_frame_desc[0].offset = 0; urb->urb.iso_frame_desc[0].length = max_packet_size; stream->urbs[u++] = urb; size -= max_packet_size; addr += max_packet_size; dma += max_packet_size; } } if (u == stream->queue_length) return 0; bufsize_error: dev_err(&ua->dev->dev, "internal buffer size error\n"); return -ENXIO; } static void free_stream_urbs(struct ua101_stream *stream) { unsigned int i; for (i = 0; i < stream->queue_length; ++i) { kfree(stream->urbs[i]); stream->urbs[i] = NULL; } } static void free_usb_related_resources(struct ua101 *ua, struct usb_interface *interface) { unsigned int i; struct usb_interface *intf; scoped_guard(mutex, &ua->mutex) { free_stream_urbs(&ua->capture); free_stream_urbs(&ua->playback); } free_stream_buffers(ua, &ua->capture); free_stream_buffers(ua, &ua->playback); for (i = 0; i < ARRAY_SIZE(ua->intf); ++i) { scoped_guard(mutex, &ua->mutex) { intf = ua->intf[i]; ua->intf[i] = NULL; } if (intf) { usb_set_intfdata(intf, NULL); if (intf != interface) usb_driver_release_interface(&ua101_driver, intf); } } } static void ua101_card_free(struct snd_card *card) { struct ua101 *ua = card->private_data; mutex_destroy(&ua->mutex); } static int ua101_probe(struct usb_interface *interface, const struct usb_device_id *usb_id) { static const struct snd_usb_midi_endpoint_info midi_ep = { .out_cables = 0x0001, .in_cables = 0x0001 }; static const struct snd_usb_audio_quirk midi_quirk = { .type = QUIRK_MIDI_FIXED_ENDPOINT, .data = &midi_ep }; static const int intf_numbers[2][3] = { { /* UA-101 */ [INTF_PLAYBACK] = 0, [INTF_CAPTURE] = 1, [INTF_MIDI] = 2, }, { /* UA-1000 */ [INTF_CAPTURE] = 1, [INTF_PLAYBACK] = 2, [INTF_MIDI] = 3, }, }; struct snd_card *card; struct ua101 *ua; unsigned int card_index, i; int is_ua1000; const char *name; char usb_path[32]; int err; is_ua1000 = usb_id->idProduct == 0x0044; if (interface->altsetting->desc.bInterfaceNumber != intf_numbers[is_ua1000][0]) return -ENODEV; guard(mutex)(&devices_mutex); for (card_index = 0; card_index < SNDRV_CARDS; ++card_index) if (enable[card_index] && !(devices_used & (1 << card_index))) break; if (card_index >= SNDRV_CARDS) return -ENOENT; err = snd_card_new(&interface->dev, index[card_index], id[card_index], THIS_MODULE, sizeof(*ua), &card); if (err < 0) return err; card->private_free = ua101_card_free; ua = card->private_data; ua->dev = interface_to_usbdev(interface); ua->card = card; ua->card_index = card_index; INIT_LIST_HEAD(&ua->midi_list); spin_lock_init(&ua->lock); mutex_init(&ua->mutex); INIT_LIST_HEAD(&ua->ready_playback_urbs); INIT_WORK(&ua->playback_work, playback_work); init_waitqueue_head(&ua->alsa_capture_wait); init_waitqueue_head(&ua->rate_feedback_wait); init_waitqueue_head(&ua->alsa_playback_wait); ua->intf[0] = interface; for (i = 1; i < ARRAY_SIZE(ua->intf); ++i) { ua->intf[i] = usb_ifnum_to_if(ua->dev, intf_numbers[is_ua1000][i]); if (!ua->intf[i]) { dev_err(&ua->dev->dev, "interface %u not found\n", intf_numbers[is_ua1000][i]); err = -ENXIO; goto probe_error; } err = usb_driver_claim_interface(&ua101_driver, ua->intf[i], ua); if (err < 0) { ua->intf[i] = NULL; err = -EBUSY; goto probe_error; } } err = detect_usb_format(ua); if (err < 0) goto probe_error; name = usb_id->idProduct == 0x0044 ? "UA-1000" : "UA-101"; strscpy(card->driver, "UA-101"); strscpy(card->shortname, name); usb_make_path(ua->dev, usb_path, sizeof(usb_path)); snprintf(ua->card->longname, sizeof(ua->card->longname), "EDIROL %s (serial %s), %u Hz at %s, %s speed", name, ua->dev->serial ? ua->dev->serial : "?", ua->rate, usb_path, ua->dev->speed == USB_SPEED_HIGH ? "high" : "full"); err = alloc_stream_buffers(ua, &ua->capture); if (err < 0) goto probe_error; err = alloc_stream_buffers(ua, &ua->playback); if (err < 0) goto probe_error; err = alloc_stream_urbs(ua, &ua->capture, capture_urb_complete); if (err < 0) goto probe_error; err = alloc_stream_urbs(ua, &ua->playback, playback_urb_complete); if (err < 0) goto probe_error; err = snd_pcm_new(card, name, 0, 1, 1, &ua->pcm); if (err < 0) goto probe_error; ua->pcm->private_data = ua; strscpy(ua->pcm->name, name); snd_pcm_set_ops(ua->pcm, SNDRV_PCM_STREAM_PLAYBACK, &playback_pcm_ops); snd_pcm_set_ops(ua->pcm, SNDRV_PCM_STREAM_CAPTURE, &capture_pcm_ops); snd_pcm_set_managed_buffer_all(ua->pcm, SNDRV_DMA_TYPE_VMALLOC, NULL, 0, 0); err = snd_usbmidi_create(card, ua->intf[INTF_MIDI], &ua->midi_list, &midi_quirk); if (err < 0) goto probe_error; err = snd_card_register(card); if (err < 0) goto probe_error; usb_set_intfdata(interface, ua); devices_used |= 1 << card_index; return 0; probe_error: free_usb_related_resources(ua, interface); snd_card_free(card); return err; } static void ua101_disconnect(struct usb_interface *interface) { struct ua101 *ua = usb_get_intfdata(interface); struct list_head *midi; if (!ua) return; guard(mutex)(&devices_mutex); set_bit(DISCONNECTED, &ua->states); wake_up(&ua->rate_feedback_wait); /* make sure that userspace cannot create new requests */ snd_card_disconnect(ua->card); /* make sure that there are no pending USB requests */ list_for_each(midi, &ua->midi_list) snd_usbmidi_disconnect(midi); abort_alsa_playback(ua); abort_alsa_capture(ua); scoped_guard(mutex, &ua->mutex) { stop_usb_playback(ua); stop_usb_capture(ua); } free_usb_related_resources(ua, interface); devices_used &= ~(1 << ua->card_index); snd_card_free_when_closed(ua->card); } static const struct usb_device_id ua101_ids[] = { { USB_DEVICE(0x0582, 0x0044) }, /* UA-1000 high speed */ { USB_DEVICE(0x0582, 0x007d) }, /* UA-101 high speed */ { USB_DEVICE(0x0582, 0x008d) }, /* UA-101 full speed */ { } }; MODULE_DEVICE_TABLE(usb, ua101_ids); static struct usb_driver ua101_driver = { .name = "snd-ua101", .id_table = ua101_ids, .probe = ua101_probe, .disconnect = ua101_disconnect, #if 0 .suspend = ua101_suspend, .resume = ua101_resume, #endif }; module_usb_driver(ua101_driver); |
| 1 1 1 1 4 4 4 1 1 3 3 3 3 1 1 3 3 3 6 6 5 1 2 2 2 3 3 3 2 7 2 5 1 1 3 10 10 2 8 8 1 4 3 6 3 3 1 1 1 4 1 3 3 2 1 3 2 1 2 7 1 1 4 4 1 3 4 2 2 6 1 5 1 1 1 1 1 1 1 18 18 1 6 1 1 4 1 1 2 5 2 1 7 1 1 1 1 1 1 1 14 15 1 7 1 1 1 1 3 17 17 2 15 7 7 1 5 1 7 6 4 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 | /* RFCOMM implementation for Linux Bluetooth stack (BlueZ). Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com> Copyright (C) 2002 Marcel Holtmann <marcel@holtmann.org> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ /* * RFCOMM sockets. */ #include <linux/compat.h> #include <linux/export.h> #include <linux/debugfs.h> #include <linux/sched/signal.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> #include <net/bluetooth/rfcomm.h> static const struct proto_ops rfcomm_sock_ops; static struct bt_sock_list rfcomm_sk_list = { .lock = __RW_LOCK_UNLOCKED(rfcomm_sk_list.lock) }; static void rfcomm_sock_close(struct sock *sk); static void rfcomm_sock_kill(struct sock *sk); /* ---- DLC callbacks ---- * * called under rfcomm_dlc_lock() */ static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb) { struct sock *sk = d->owner; if (!sk) return; atomic_add(skb->len, &sk->sk_rmem_alloc); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) rfcomm_dlc_throttle(d); } static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err) { struct sock *sk = d->owner, *parent; if (!sk) return; BT_DBG("dlc %p state %ld err %d", d, d->state, err); lock_sock(sk); if (err) sk->sk_err = err; sk->sk_state = d->state; parent = bt_sk(sk)->parent; if (parent) { if (d->state == BT_CLOSED) { sock_set_flag(sk, SOCK_ZAPPED); bt_accept_unlink(sk); } parent->sk_data_ready(parent); } else { if (d->state == BT_CONNECTED) rfcomm_session_getaddr(d->session, &rfcomm_pi(sk)->src, NULL); sk->sk_state_change(sk); } release_sock(sk); if (parent && sock_flag(sk, SOCK_ZAPPED)) { /* We have to drop DLC lock here, otherwise * rfcomm_sock_destruct() will dead lock. */ rfcomm_dlc_unlock(d); rfcomm_sock_kill(sk); rfcomm_dlc_lock(d); } } /* ---- Socket functions ---- */ static struct sock *__rfcomm_get_listen_sock_by_addr(u8 channel, bdaddr_t *src) { struct sock *sk = NULL; sk_for_each(sk, &rfcomm_sk_list.head) { if (rfcomm_pi(sk)->channel != channel) continue; if (bacmp(&rfcomm_pi(sk)->src, src)) continue; if (sk->sk_state == BT_BOUND || sk->sk_state == BT_LISTEN) break; } return sk ? sk : NULL; } /* Find socket with channel and source bdaddr. * Returns closest match. */ static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src) { struct sock *sk = NULL, *sk1 = NULL; read_lock(&rfcomm_sk_list.lock); sk_for_each(sk, &rfcomm_sk_list.head) { if (state && sk->sk_state != state) continue; if (rfcomm_pi(sk)->channel == channel) { /* Exact match. */ if (!bacmp(&rfcomm_pi(sk)->src, src)) break; /* Closest match */ if (!bacmp(&rfcomm_pi(sk)->src, BDADDR_ANY)) sk1 = sk; } } read_unlock(&rfcomm_sk_list.lock); return sk ? sk : sk1; } static void rfcomm_sock_destruct(struct sock *sk) { struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; BT_DBG("sk %p dlc %p", sk, d); skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); rfcomm_dlc_lock(d); rfcomm_pi(sk)->dlc = NULL; /* Detach DLC if it's owned by this socket */ if (d->owner == sk) d->owner = NULL; rfcomm_dlc_unlock(d); rfcomm_dlc_put(d); } static void rfcomm_sock_cleanup_listen(struct sock *parent) { struct sock *sk; BT_DBG("parent %p", parent); /* Close not yet accepted dlcs */ while ((sk = bt_accept_dequeue(parent, NULL))) { rfcomm_sock_close(sk); rfcomm_sock_kill(sk); } parent->sk_state = BT_CLOSED; sock_set_flag(parent, SOCK_ZAPPED); } /* Kill socket (only if zapped and orphan) * Must be called on unlocked socket. */ static void rfcomm_sock_kill(struct sock *sk) { if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) return; BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); /* Kill poor orphan */ bt_sock_unlink(&rfcomm_sk_list, sk); sock_set_flag(sk, SOCK_DEAD); sock_put(sk); } static void __rfcomm_sock_close(struct sock *sk) { struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket); switch (sk->sk_state) { case BT_LISTEN: rfcomm_sock_cleanup_listen(sk); break; case BT_CONNECT: case BT_CONNECT2: case BT_CONFIG: case BT_CONNECTED: rfcomm_dlc_close(d, 0); fallthrough; default: sock_set_flag(sk, SOCK_ZAPPED); break; } } /* Close socket. * Must be called on unlocked socket. */ static void rfcomm_sock_close(struct sock *sk) { lock_sock(sk); __rfcomm_sock_close(sk); release_sock(sk); } static void rfcomm_sock_init(struct sock *sk, struct sock *parent) { struct rfcomm_pinfo *pi = rfcomm_pi(sk); BT_DBG("sk %p", sk); if (parent) { sk->sk_type = parent->sk_type; pi->dlc->defer_setup = test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags); pi->sec_level = rfcomm_pi(parent)->sec_level; pi->role_switch = rfcomm_pi(parent)->role_switch; security_sk_clone(parent, sk); } else { pi->dlc->defer_setup = 0; pi->sec_level = BT_SECURITY_LOW; pi->role_switch = 0; } pi->dlc->sec_level = pi->sec_level; pi->dlc->role_switch = pi->role_switch; } static struct proto rfcomm_proto = { .name = "RFCOMM", .owner = THIS_MODULE, .obj_size = sizeof(struct rfcomm_pinfo) }; static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern) { struct rfcomm_dlc *d; struct sock *sk; d = rfcomm_dlc_alloc(prio); if (!d) return NULL; sk = bt_sock_alloc(net, sock, &rfcomm_proto, proto, prio, kern); if (!sk) { rfcomm_dlc_free(d); return NULL; } d->data_ready = rfcomm_sk_data_ready; d->state_change = rfcomm_sk_state_change; rfcomm_pi(sk)->dlc = d; d->owner = sk; sk->sk_destruct = rfcomm_sock_destruct; sk->sk_sndtimeo = RFCOMM_CONN_TIMEOUT; sk->sk_sndbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10; sk->sk_rcvbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10; bt_sock_link(&rfcomm_sk_list, sk); BT_DBG("sk %p", sk); return sk; } static int rfcomm_sock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; BT_DBG("sock %p", sock); sock->state = SS_UNCONNECTED; if (sock->type != SOCK_STREAM && sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; sock->ops = &rfcomm_sock_ops; sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; rfcomm_sock_init(sk, NULL); return 0; } static int rfcomm_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len) { struct sockaddr_rc sa; struct sock *sk = sock->sk; int len, err = 0; if (!addr || addr_len < offsetofend(struct sockaddr, sa_family) || addr->sa_family != AF_BLUETOOTH) return -EINVAL; memset(&sa, 0, sizeof(sa)); len = min_t(unsigned int, sizeof(sa), addr_len); memcpy(&sa, addr, len); BT_DBG("sk %p %pMR", sk, &sa.rc_bdaddr); lock_sock(sk); if (sk->sk_state != BT_OPEN) { err = -EBADFD; goto done; } if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; goto done; } write_lock(&rfcomm_sk_list.lock); if (sa.rc_channel && __rfcomm_get_listen_sock_by_addr(sa.rc_channel, &sa.rc_bdaddr)) { err = -EADDRINUSE; } else { /* Save source address */ bacpy(&rfcomm_pi(sk)->src, &sa.rc_bdaddr); rfcomm_pi(sk)->channel = sa.rc_channel; sk->sk_state = BT_BOUND; } write_unlock(&rfcomm_sk_list.lock); done: release_sock(sk); return err; } static int rfcomm_sock_connect(struct socket *sock, struct sockaddr_unsized *addr, int alen, int flags) { struct sockaddr_rc *sa = (struct sockaddr_rc *) addr; struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; int err = 0; BT_DBG("sk %p", sk); if (alen < sizeof(struct sockaddr_rc) || addr->sa_family != AF_BLUETOOTH) return -EINVAL; sock_hold(sk); lock_sock(sk); if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { err = -EBADFD; goto done; } if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; goto done; } sk->sk_state = BT_CONNECT; bacpy(&rfcomm_pi(sk)->dst, &sa->rc_bdaddr); rfcomm_pi(sk)->channel = sa->rc_channel; d->sec_level = rfcomm_pi(sk)->sec_level; d->role_switch = rfcomm_pi(sk)->role_switch; /* Drop sock lock to avoid potential deadlock with the RFCOMM lock */ release_sock(sk); err = rfcomm_dlc_open(d, &rfcomm_pi(sk)->src, &sa->rc_bdaddr, sa->rc_channel); lock_sock(sk); if (!err && !sock_flag(sk, SOCK_ZAPPED)) err = bt_sock_wait_state(sk, BT_CONNECTED, sock_sndtimeo(sk, flags & O_NONBLOCK)); done: release_sock(sk); sock_put(sk); return err; } static int rfcomm_sock_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; int err = 0; BT_DBG("sk %p backlog %d", sk, backlog); lock_sock(sk); if (sk->sk_state != BT_BOUND) { err = -EBADFD; goto done; } if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; goto done; } if (!rfcomm_pi(sk)->channel) { bdaddr_t *src = &rfcomm_pi(sk)->src; u8 channel; err = -EINVAL; write_lock(&rfcomm_sk_list.lock); for (channel = 1; channel < 31; channel++) if (!__rfcomm_get_listen_sock_by_addr(channel, src)) { rfcomm_pi(sk)->channel = channel; err = 0; break; } write_unlock(&rfcomm_sk_list.lock); if (err < 0) goto done; } sk->sk_max_ack_backlog = backlog; sk->sk_ack_backlog = 0; sk->sk_state = BT_LISTEN; done: release_sock(sk); return err; } static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, struct proto_accept_arg *arg) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; long timeo; int err = 0; lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; goto done; } timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); while (1) { if (sk->sk_state != BT_LISTEN) { err = -EBADFD; break; } nsk = bt_accept_dequeue(sk, newsock); if (nsk) break; if (!timeo) { err = -EAGAIN; break; } if (signal_pending(current)) { err = sock_intr_errno(timeo); break; } release_sock(sk); timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); } remove_wait_queue(sk_sleep(sk), &wait); if (err) goto done; newsock->state = SS_CONNECTED; BT_DBG("new socket %p", nsk); done: release_sock(sk); return err; } static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int peer) { struct sockaddr_rc *sa = (struct sockaddr_rc *) addr; struct sock *sk = sock->sk; BT_DBG("sock %p, sk %p", sock, sk); if (peer && sk->sk_state != BT_CONNECTED && sk->sk_state != BT_CONNECT && sk->sk_state != BT_CONNECT2) return -ENOTCONN; memset(sa, 0, sizeof(*sa)); sa->rc_family = AF_BLUETOOTH; sa->rc_channel = rfcomm_pi(sk)->channel; if (peer) bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->dst); else bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->src); return sizeof(struct sockaddr_rc); } static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; struct sk_buff *skb; int sent; if (test_bit(RFCOMM_DEFER_SETUP, &d->flags)) return -ENOTCONN; if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; if (sk->sk_shutdown & SEND_SHUTDOWN) return -EPIPE; BT_DBG("sock %p, sk %p", sock, sk); lock_sock(sk); sent = bt_sock_wait_ready(sk, msg->msg_flags); release_sock(sk); if (sent) return sent; skb = bt_skb_sendmmsg(sk, msg, len, d->mtu, RFCOMM_SKB_HEAD_RESERVE, RFCOMM_SKB_TAIL_RESERVE); if (IS_ERR(skb)) return PTR_ERR(skb); sent = rfcomm_dlc_send(d, skb); if (sent < 0) kfree_skb(skb); return sent; } static int rfcomm_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; int len; if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { rfcomm_dlc_accept(d); return 0; } len = bt_sock_stream_recvmsg(sock, msg, size, flags); lock_sock(sk); if (!(flags & MSG_PEEK) && len > 0) atomic_sub(len, &sk->sk_rmem_alloc); if (atomic_read(&sk->sk_rmem_alloc) <= (sk->sk_rcvbuf >> 2)) rfcomm_dlc_unthrottle(rfcomm_pi(sk)->dlc); release_sock(sk); return len; } static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; int err = 0; u32 opt; BT_DBG("sk %p", sk); lock_sock(sk); switch (optname) { case RFCOMM_LM: err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen); if (err) break; if (opt & RFCOMM_LM_FIPS) { err = -EINVAL; break; } if (opt & RFCOMM_LM_AUTH) rfcomm_pi(sk)->sec_level = BT_SECURITY_LOW; if (opt & RFCOMM_LM_ENCRYPT) rfcomm_pi(sk)->sec_level = BT_SECURITY_MEDIUM; if (opt & RFCOMM_LM_SECURE) rfcomm_pi(sk)->sec_level = BT_SECURITY_HIGH; rfcomm_pi(sk)->role_switch = (opt & RFCOMM_LM_MASTER); break; default: err = -ENOPROTOOPT; break; } release_sock(sk); return err; } static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct bt_security sec; int err = 0; u32 opt; BT_DBG("sk %p", sk); if (level == SOL_RFCOMM) return rfcomm_sock_setsockopt_old(sock, optname, optval, optlen); if (level != SOL_BLUETOOTH) return -ENOPROTOOPT; lock_sock(sk); switch (optname) { case BT_SECURITY: if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; break; } sec.level = BT_SECURITY_LOW; err = copy_safe_from_sockptr(&sec, sizeof(sec), optval, optlen); if (err) break; if (sec.level > BT_SECURITY_HIGH) { err = -EINVAL; break; } rfcomm_pi(sk)->sec_level = sec.level; break; case BT_DEFER_SETUP: if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { err = -EINVAL; break; } err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen); if (err) break; if (opt) set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); else clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); break; default: err = -ENOPROTOOPT; break; } release_sock(sk); return err; } static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct sock *l2cap_sk; struct l2cap_conn *conn; struct rfcomm_conninfo cinfo; int err = 0; size_t len; u32 opt; BT_DBG("sk %p", sk); if (get_user(len, optlen)) return -EFAULT; lock_sock(sk); switch (optname) { case RFCOMM_LM: switch (rfcomm_pi(sk)->sec_level) { case BT_SECURITY_LOW: opt = RFCOMM_LM_AUTH; break; case BT_SECURITY_MEDIUM: opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT; break; case BT_SECURITY_HIGH: opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE; break; case BT_SECURITY_FIPS: opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE | RFCOMM_LM_FIPS; break; default: opt = 0; break; } if (rfcomm_pi(sk)->role_switch) opt |= RFCOMM_LM_MASTER; if (put_user(opt, (u32 __user *) optval)) err = -EFAULT; break; case RFCOMM_CONNINFO: if (sk->sk_state != BT_CONNECTED && !rfcomm_pi(sk)->dlc->defer_setup) { err = -ENOTCONN; break; } l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk; conn = l2cap_pi(l2cap_sk)->chan->conn; memset(&cinfo, 0, sizeof(cinfo)); cinfo.hci_handle = conn->hcon->handle; memcpy(cinfo.dev_class, conn->hcon->dev_class, 3); len = min(len, sizeof(cinfo)); if (copy_to_user(optval, (char *) &cinfo, len)) err = -EFAULT; break; default: err = -ENOPROTOOPT; break; } release_sock(sk); return err; } static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct bt_security sec; int err = 0; size_t len; BT_DBG("sk %p", sk); if (level == SOL_RFCOMM) return rfcomm_sock_getsockopt_old(sock, optname, optval, optlen); if (level != SOL_BLUETOOTH) return -ENOPROTOOPT; if (get_user(len, optlen)) return -EFAULT; lock_sock(sk); switch (optname) { case BT_SECURITY: if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; break; } sec.level = rfcomm_pi(sk)->sec_level; sec.key_size = 0; len = min(len, sizeof(sec)); if (copy_to_user(optval, (char *) &sec, len)) err = -EFAULT; break; case BT_DEFER_SETUP: if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { err = -EINVAL; break; } if (put_user(test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags), (u32 __user *) optval)) err = -EFAULT; break; default: err = -ENOPROTOOPT; break; } release_sock(sk); return err; } static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk __maybe_unused = sock->sk; int err; BT_DBG("sk %p cmd %x arg %lx", sk, cmd, arg); err = bt_sock_ioctl(sock, cmd, arg); if (err == -ENOIOCTLCMD) { #ifdef CONFIG_BT_RFCOMM_TTY err = rfcomm_dev_ioctl(sk, cmd, (void __user *) arg); #else err = -EOPNOTSUPP; #endif } return err; } #ifdef CONFIG_COMPAT static int rfcomm_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { return rfcomm_sock_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); } #endif static int rfcomm_sock_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; int err = 0; BT_DBG("sock %p, sk %p", sock, sk); if (!sk) return 0; lock_sock(sk); if (!sk->sk_shutdown) { sk->sk_shutdown = SHUTDOWN_MASK; release_sock(sk); __rfcomm_sock_close(sk); lock_sock(sk); if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && !(current->flags & PF_EXITING)) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); } release_sock(sk); return err; } static int rfcomm_sock_release(struct socket *sock) { struct sock *sk = sock->sk; int err; BT_DBG("sock %p, sk %p", sock, sk); if (!sk) return 0; err = rfcomm_sock_shutdown(sock, 2); sock_orphan(sk); rfcomm_sock_kill(sk); return err; } /* ---- RFCOMM core layer callbacks ---- * * called under rfcomm_lock() */ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc **d) { struct sock *sk, *parent; bdaddr_t src, dst; int result = 0; BT_DBG("session %p channel %d", s, channel); rfcomm_session_getaddr(s, &src, &dst); /* Check if we have socket listening on channel */ parent = rfcomm_get_sock_by_channel(BT_LISTEN, channel, &src); if (!parent) return 0; lock_sock(parent); /* Check for backlog size */ if (sk_acceptq_is_full(parent)) { BT_DBG("backlog full %d", parent->sk_ack_backlog); goto done; } sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC, 0); if (!sk) goto done; bt_sock_reclassify_lock(sk, BTPROTO_RFCOMM); rfcomm_sock_init(sk, parent); bacpy(&rfcomm_pi(sk)->src, &src); bacpy(&rfcomm_pi(sk)->dst, &dst); rfcomm_pi(sk)->channel = channel; sk->sk_state = BT_CONFIG; bt_accept_enqueue(parent, sk, true); /* Accept connection and return socket DLC */ *d = rfcomm_pi(sk)->dlc; result = 1; done: release_sock(parent); if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags)) parent->sk_state_change(parent); return result; } static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p) { struct sock *sk; read_lock(&rfcomm_sk_list.lock); sk_for_each(sk, &rfcomm_sk_list.head) { seq_printf(f, "%pMR %pMR %d %d\n", &rfcomm_pi(sk)->src, &rfcomm_pi(sk)->dst, sk->sk_state, rfcomm_pi(sk)->channel); } read_unlock(&rfcomm_sk_list.lock); return 0; } DEFINE_SHOW_ATTRIBUTE(rfcomm_sock_debugfs); static struct dentry *rfcomm_sock_debugfs; static const struct proto_ops rfcomm_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = rfcomm_sock_release, .bind = rfcomm_sock_bind, .connect = rfcomm_sock_connect, .listen = rfcomm_sock_listen, .accept = rfcomm_sock_accept, .getname = rfcomm_sock_getname, .sendmsg = rfcomm_sock_sendmsg, .recvmsg = rfcomm_sock_recvmsg, .shutdown = rfcomm_sock_shutdown, .setsockopt = rfcomm_sock_setsockopt, .getsockopt = rfcomm_sock_getsockopt, .ioctl = rfcomm_sock_ioctl, .gettstamp = sock_gettstamp, .poll = bt_sock_poll, .socketpair = sock_no_socketpair, .mmap = sock_no_mmap, #ifdef CONFIG_COMPAT .compat_ioctl = rfcomm_sock_compat_ioctl, #endif }; static const struct net_proto_family rfcomm_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = rfcomm_sock_create }; int __init rfcomm_init_sockets(void) { int err; BUILD_BUG_ON(sizeof(struct sockaddr_rc) > sizeof(struct sockaddr)); err = proto_register(&rfcomm_proto, 0); if (err < 0) return err; err = bt_sock_register(BTPROTO_RFCOMM, &rfcomm_sock_family_ops); if (err < 0) { BT_ERR("RFCOMM socket layer registration failed"); goto error; } err = bt_procfs_init(&init_net, "rfcomm", &rfcomm_sk_list, NULL); if (err < 0) { BT_ERR("Failed to create RFCOMM proc file"); bt_sock_unregister(BTPROTO_RFCOMM); goto error; } BT_INFO("RFCOMM socket layer initialized"); if (IS_ERR_OR_NULL(bt_debugfs)) return 0; rfcomm_sock_debugfs = debugfs_create_file("rfcomm", 0444, bt_debugfs, NULL, &rfcomm_sock_debugfs_fops); return 0; error: proto_unregister(&rfcomm_proto); return err; } void __exit rfcomm_cleanup_sockets(void) { bt_procfs_cleanup(&init_net, "rfcomm"); debugfs_remove(rfcomm_sock_debugfs); bt_sock_unregister(BTPROTO_RFCOMM); proto_unregister(&rfcomm_proto); } |
| 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 | // SPDX-License-Identifier: GPL-2.0+ /* * Support for emulating SAT (ata pass through) on devices based * on the Cypress USB/ATA bridge supporting ATACB. * * Copyright (c) 2008 Matthieu Castet (castet.matthieu@free.fr) */ #include <linux/module.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_eh.h> #include <linux/ata.h> #include "usb.h" #include "protocol.h" #include "scsiglue.h" #include "debug.h" #define DRV_NAME "ums-cypress" MODULE_DESCRIPTION("SAT support for Cypress USB/ATA bridges with ATACB"); MODULE_AUTHOR("Matthieu Castet <castet.matthieu@free.fr>"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("USB_STORAGE"); /* * The table of devices */ #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName, useProtocol, useTransport, \ initFunction, flags) \ { USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \ .driver_info = (flags) } static const struct usb_device_id cypress_usb_ids[] = { # include "unusual_cypress.h" { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, cypress_usb_ids); #undef UNUSUAL_DEV /* * The flags table */ #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ { \ .vendorName = vendor_name, \ .productName = product_name, \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ } static const struct us_unusual_dev cypress_unusual_dev_list[] = { # include "unusual_cypress.h" { } /* Terminating entry */ }; #undef UNUSUAL_DEV /* * ATACB is a protocol used on cypress usb<->ata bridge to * send raw ATA command over mass storage * There is a ATACB2 protocol that support LBA48 on newer chip. * More info that be found on cy7c68310_8.pdf and cy7c68300c_8.pdf * datasheet from cypress.com. */ static void cypress_atacb_passthrough(struct scsi_cmnd *srb, struct us_data *us) { unsigned char save_cmnd[MAX_COMMAND_SIZE]; if (likely(srb->cmnd[0] != ATA_16 && srb->cmnd[0] != ATA_12)) { usb_stor_transparent_scsi_command(srb, us); return; } memcpy(save_cmnd, srb->cmnd, sizeof(save_cmnd)); memset(srb->cmnd, 0, MAX_COMMAND_SIZE); /* check if we support the command */ if (save_cmnd[1] >> 5) /* MULTIPLE_COUNT */ goto invalid_fld; /* check protocol */ switch ((save_cmnd[1] >> 1) & 0xf) { case 3: /*no DATA */ case 4: /* PIO in */ case 5: /* PIO out */ break; default: goto invalid_fld; } /* first build the ATACB command */ srb->cmd_len = 16; srb->cmnd[0] = 0x24; /* * bVSCBSignature : vendor-specific command * this value can change, but most(all ?) manufacturers * keep the cypress default : 0x24 */ srb->cmnd[1] = 0x24; /* bVSCBSubCommand : 0x24 for ATACB */ srb->cmnd[3] = 0xff - 1; /* * features, sector count, lba low, lba med * lba high, device, command are valid */ srb->cmnd[4] = 1; /* TransferBlockCount : 512 */ if (save_cmnd[0] == ATA_16) { srb->cmnd[ 6] = save_cmnd[ 4]; /* features */ srb->cmnd[ 7] = save_cmnd[ 6]; /* sector count */ srb->cmnd[ 8] = save_cmnd[ 8]; /* lba low */ srb->cmnd[ 9] = save_cmnd[10]; /* lba med */ srb->cmnd[10] = save_cmnd[12]; /* lba high */ srb->cmnd[11] = save_cmnd[13]; /* device */ srb->cmnd[12] = save_cmnd[14]; /* command */ if (save_cmnd[1] & 0x01) {/* extended bit set for LBA48 */ /* this could be supported by atacb2 */ if (save_cmnd[3] || save_cmnd[5] || save_cmnd[7] || save_cmnd[9] || save_cmnd[11]) goto invalid_fld; } } else { /* ATA12 */ srb->cmnd[ 6] = save_cmnd[3]; /* features */ srb->cmnd[ 7] = save_cmnd[4]; /* sector count */ srb->cmnd[ 8] = save_cmnd[5]; /* lba low */ srb->cmnd[ 9] = save_cmnd[6]; /* lba med */ srb->cmnd[10] = save_cmnd[7]; /* lba high */ srb->cmnd[11] = save_cmnd[8]; /* device */ srb->cmnd[12] = save_cmnd[9]; /* command */ } /* Filter SET_FEATURES - XFER MODE command */ if ((srb->cmnd[12] == ATA_CMD_SET_FEATURES) && (srb->cmnd[6] == SETFEATURES_XFER)) goto invalid_fld; if (srb->cmnd[12] == ATA_CMD_ID_ATA || srb->cmnd[12] == ATA_CMD_ID_ATAPI) srb->cmnd[2] |= (1<<7); /* set IdentifyPacketDevice for these cmds */ usb_stor_transparent_scsi_command(srb, us); /* if the device doesn't support ATACB */ if (srb->result == SAM_STAT_CHECK_CONDITION && memcmp(srb->sense_buffer, usb_stor_sense_invalidCDB, sizeof(usb_stor_sense_invalidCDB)) == 0) { usb_stor_dbg(us, "cypress atacb not supported ???\n"); goto end; } /* * if ck_cond flags is set, and there wasn't critical error, * build the special sense */ if ((srb->result != (DID_ERROR << 16) && srb->result != (DID_ABORT << 16)) && save_cmnd[2] & 0x20) { struct scsi_eh_save ses; unsigned char regs[8]; unsigned char *sb = srb->sense_buffer; unsigned char *desc = sb + 8; int tmp_result; /* build the command for reading the ATA registers */ scsi_eh_prep_cmnd(srb, &ses, NULL, 0, sizeof(regs)); /* * we use the same command as before, but we set * the read taskfile bit, for not executing atacb command, * but reading register selected in srb->cmnd[4] */ srb->cmd_len = 16; srb->cmnd[2] = 1; usb_stor_transparent_scsi_command(srb, us); memcpy(regs, srb->sense_buffer, sizeof(regs)); tmp_result = srb->result; scsi_eh_restore_cmnd(srb, &ses); /* we fail to get registers, report invalid command */ if (tmp_result != SAM_STAT_GOOD) goto invalid_fld; /* build the sense */ memset(sb, 0, SCSI_SENSE_BUFFERSIZE); /* set sk, asc for a good command */ sb[1] = RECOVERED_ERROR; sb[2] = 0; /* ATA PASS THROUGH INFORMATION AVAILABLE */ sb[3] = 0x1D; /* * XXX we should generate sk, asc, ascq from status and error * regs * (see 11.1 Error translation ATA device error to SCSI error * map, and ata_to_sense_error from libata.) */ /* Sense data is current and format is descriptor. */ sb[0] = 0x72; desc[0] = 0x09; /* ATA_RETURN_DESCRIPTOR */ /* set length of additional sense data */ sb[7] = 14; desc[1] = 12; /* Copy registers into sense buffer. */ desc[ 2] = 0x00; desc[ 3] = regs[1]; /* features */ desc[ 5] = regs[2]; /* sector count */ desc[ 7] = regs[3]; /* lba low */ desc[ 9] = regs[4]; /* lba med */ desc[11] = regs[5]; /* lba high */ desc[12] = regs[6]; /* device */ desc[13] = regs[7]; /* command */ srb->result = SAM_STAT_CHECK_CONDITION; } goto end; invalid_fld: srb->result = SAM_STAT_CHECK_CONDITION; memcpy(srb->sense_buffer, usb_stor_sense_invalidCDB, sizeof(usb_stor_sense_invalidCDB)); end: memcpy(srb->cmnd, save_cmnd, sizeof(save_cmnd)); if (srb->cmnd[0] == ATA_12) srb->cmd_len = 12; } static struct scsi_host_template cypress_host_template; static int cypress_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct us_data *us; int result; struct usb_device *device; result = usb_stor_probe1(&us, intf, id, (id - cypress_usb_ids) + cypress_unusual_dev_list, &cypress_host_template); if (result) return result; /* * Among CY7C68300 chips, the A revision does not support Cypress ATACB * Filter out this revision from EEPROM default descriptor values */ device = interface_to_usbdev(intf); if (device->descriptor.iManufacturer != 0x38 || device->descriptor.iProduct != 0x4e || device->descriptor.iSerialNumber != 0x64) { us->protocol_name = "Transparent SCSI with Cypress ATACB"; us->proto_handler = cypress_atacb_passthrough; } else { us->protocol_name = "Transparent SCSI"; us->proto_handler = usb_stor_transparent_scsi_command; } result = usb_stor_probe2(us); return result; } static struct usb_driver cypress_driver = { .name = DRV_NAME, .probe = cypress_probe, .disconnect = usb_stor_disconnect, .suspend = usb_stor_suspend, .resume = usb_stor_resume, .reset_resume = usb_stor_reset_resume, .pre_reset = usb_stor_pre_reset, .post_reset = usb_stor_post_reset, .id_table = cypress_usb_ids, .soft_unbind = 1, .no_dynamic_id = 1, }; module_usb_stor_driver(cypress_driver, cypress_host_template, DRV_NAME); |
| 5 1 3 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ #include <linux/iommu.h> #include <linux/iommufd.h> #include <linux/pci-ats.h> #include <linux/slab.h> #include <uapi/linux/iommufd.h> #include "../iommu-priv.h" #include "io_pagetable.h" #include "iommufd_private.h" static bool allow_unsafe_interrupts; module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC( allow_unsafe_interrupts, "Allow IOMMUFD to bind to devices even if the platform cannot isolate " "the MSI interrupt window. Enabling this is a security weakness."); struct iommufd_attach { struct iommufd_hw_pagetable *hwpt; struct xarray device_array; }; static void iommufd_group_release(struct kref *kref) { struct iommufd_group *igroup = container_of(kref, struct iommufd_group, ref); WARN_ON(!xa_empty(&igroup->pasid_attach)); xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup, NULL, GFP_KERNEL); iommu_group_put(igroup->group); mutex_destroy(&igroup->lock); kfree(igroup); } static void iommufd_put_group(struct iommufd_group *group) { kref_put(&group->ref, iommufd_group_release); } static bool iommufd_group_try_get(struct iommufd_group *igroup, struct iommu_group *group) { if (!igroup) return false; /* * group ID's cannot be re-used until the group is put back which does * not happen if we could get an igroup pointer under the xa_lock. */ if (WARN_ON(igroup->group != group)) return false; return kref_get_unless_zero(&igroup->ref); } /* * iommufd needs to store some more data for each iommu_group, we keep a * parallel xarray indexed by iommu_group id to hold this instead of putting it * in the core structure. To keep things simple the iommufd_group memory is * unique within the iommufd_ctx. This makes it easy to check there are no * memory leaks. */ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, struct device *dev) { struct iommufd_group *new_igroup; struct iommufd_group *cur_igroup; struct iommufd_group *igroup; struct iommu_group *group; unsigned int id; group = iommu_group_get(dev); if (!group) return ERR_PTR(-ENODEV); id = iommu_group_id(group); xa_lock(&ictx->groups); igroup = xa_load(&ictx->groups, id); if (iommufd_group_try_get(igroup, group)) { xa_unlock(&ictx->groups); iommu_group_put(group); return igroup; } xa_unlock(&ictx->groups); new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL); if (!new_igroup) { iommu_group_put(group); return ERR_PTR(-ENOMEM); } kref_init(&new_igroup->ref); mutex_init(&new_igroup->lock); xa_init(&new_igroup->pasid_attach); new_igroup->sw_msi_start = PHYS_ADDR_MAX; /* group reference moves into new_igroup */ new_igroup->group = group; /* * The ictx is not additionally refcounted here becase all objects using * an igroup must put it before their destroy completes. */ new_igroup->ictx = ictx; /* * We dropped the lock so igroup is invalid. NULL is a safe and likely * value to assume for the xa_cmpxchg algorithm. */ cur_igroup = NULL; xa_lock(&ictx->groups); while (true) { igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup, GFP_KERNEL); if (xa_is_err(igroup)) { xa_unlock(&ictx->groups); iommufd_put_group(new_igroup); return ERR_PTR(xa_err(igroup)); } /* new_group was successfully installed */ if (cur_igroup == igroup) { xa_unlock(&ictx->groups); return new_igroup; } /* Check again if the current group is any good */ if (iommufd_group_try_get(igroup, group)) { xa_unlock(&ictx->groups); iommufd_put_group(new_igroup); return igroup; } cur_igroup = igroup; } } static void iommufd_device_remove_vdev(struct iommufd_device *idev) { struct iommufd_vdevice *vdev; mutex_lock(&idev->igroup->lock); /* prevent new references from vdev */ idev->destroying = true; /* vdev has been completely destroyed by userspace */ if (!idev->vdev) goto out_unlock; vdev = iommufd_get_vdevice(idev->ictx, idev->vdev->obj.id); /* * An ongoing vdev destroy ioctl has removed the vdev from the object * xarray, but has not finished iommufd_vdevice_destroy() yet as it * needs the same mutex. We exit the locking then wait on wait_cnt * reference for the vdev destruction. */ if (IS_ERR(vdev)) goto out_unlock; /* Should never happen */ if (WARN_ON(vdev != idev->vdev)) { iommufd_put_object(idev->ictx, &vdev->obj); goto out_unlock; } /* * vdev is still alive. Hold a users refcount to prevent racing with * userspace destruction, then use iommufd_object_tombstone_user() to * destroy it and leave a tombstone. */ refcount_inc(&vdev->obj.users); iommufd_put_object(idev->ictx, &vdev->obj); mutex_unlock(&idev->igroup->lock); iommufd_object_tombstone_user(idev->ictx, &vdev->obj); return; out_unlock: mutex_unlock(&idev->igroup->lock); } void iommufd_device_pre_destroy(struct iommufd_object *obj) { struct iommufd_device *idev = container_of(obj, struct iommufd_device, obj); /* Release the wait_cnt reference on this */ iommufd_device_remove_vdev(idev); } void iommufd_device_destroy(struct iommufd_object *obj) { struct iommufd_device *idev = container_of(obj, struct iommufd_device, obj); iommu_device_release_dma_owner(idev->dev); iommufd_put_group(idev->igroup); if (!iommufd_selftest_is_mock_dev(idev->dev)) iommufd_ctx_put(idev->ictx); } /** * iommufd_device_bind - Bind a physical device to an iommu fd * @ictx: iommufd file descriptor * @dev: Pointer to a physical device struct * @id: Output ID number to return to userspace for this device * * A successful bind establishes an ownership over the device and returns * struct iommufd_device pointer, otherwise returns error pointer. * * A driver using this API must set driver_managed_dma and must not touch * the device until this routine succeeds and establishes ownership. * * Binding a PCI device places the entire RID under iommufd control. * * The caller must undo this with iommufd_device_unbind() */ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id) { struct iommufd_device *idev; struct iommufd_group *igroup; int rc; /* * iommufd always sets IOMMU_CACHE because we offer no way for userspace * to restore cache coherency. */ if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) return ERR_PTR(-EINVAL); igroup = iommufd_get_group(ictx, dev); if (IS_ERR(igroup)) return ERR_CAST(igroup); /* * For historical compat with VFIO the insecure interrupt path is * allowed if the module parameter is set. Secure/Isolated means that a * MemWr operation from the device (eg a simple DMA) cannot trigger an * interrupt outside this iommufd context. */ if (!iommufd_selftest_is_mock_dev(dev) && !iommu_group_has_isolated_msi(igroup->group)) { if (!allow_unsafe_interrupts) { rc = -EPERM; goto out_group_put; } dev_warn( dev, "MSI interrupts are not secure, they cannot be isolated by the platform. " "Check that platform features like interrupt remapping are enabled. " "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); } rc = iommu_device_claim_dma_owner(dev, ictx); if (rc) goto out_group_put; idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE); if (IS_ERR(idev)) { rc = PTR_ERR(idev); goto out_release_owner; } idev->ictx = ictx; if (!iommufd_selftest_is_mock_dev(dev)) iommufd_ctx_get(ictx); idev->dev = dev; idev->enforce_cache_coherency = device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); /* The calling driver is a user until iommufd_device_unbind() */ refcount_inc(&idev->obj.users); /* igroup refcount moves into iommufd_device */ idev->igroup = igroup; /* * If the caller fails after this success it must call * iommufd_unbind_device() which is safe since we hold this refcount. * This also means the device is a leaf in the graph and no other object * can take a reference on it. */ iommufd_object_finalize(ictx, &idev->obj); *id = idev->obj.id; return idev; out_release_owner: iommu_device_release_dma_owner(dev); out_group_put: iommufd_put_group(igroup); return ERR_PTR(rc); } EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, "IOMMUFD"); /** * iommufd_ctx_has_group - True if any device within the group is bound * to the ictx * @ictx: iommufd file descriptor * @group: Pointer to a physical iommu_group struct * * True if any device within the group has been bound to this ictx, ex. via * iommufd_device_bind(), therefore implying ictx ownership of the group. */ bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group) { struct iommufd_object *obj; unsigned long index; if (!ictx || !group) return false; xa_lock(&ictx->objects); xa_for_each(&ictx->objects, index, obj) { if (obj->type == IOMMUFD_OBJ_DEVICE && container_of(obj, struct iommufd_device, obj) ->igroup->group == group) { xa_unlock(&ictx->objects); return true; } } xa_unlock(&ictx->objects); return false; } EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, "IOMMUFD"); /** * iommufd_device_unbind - Undo iommufd_device_bind() * @idev: Device returned by iommufd_device_bind() * * Release the device from iommufd control. The DMA ownership will return back * to unowned with DMA controlled by the DMA API. This invalidates the * iommufd_device pointer, other APIs that consume it must not be called * concurrently. */ void iommufd_device_unbind(struct iommufd_device *idev) { iommufd_object_destroy_user(idev->ictx, &idev->obj); } EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, "IOMMUFD"); struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev) { return idev->ictx; } EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, "IOMMUFD"); u32 iommufd_device_to_id(struct iommufd_device *idev) { return idev->obj.id; } EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD"); static unsigned int iommufd_group_device_num(struct iommufd_group *igroup, ioasid_t pasid) { struct iommufd_attach *attach; struct iommufd_device *idev; unsigned int count = 0; unsigned long index; lockdep_assert_held(&igroup->lock); attach = xa_load(&igroup->pasid_attach, pasid); if (attach) xa_for_each(&attach->device_array, index, idev) count++; return count; } #ifdef CONFIG_IRQ_MSI_IOMMU static int iommufd_group_setup_msi(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { struct iommufd_ctx *ictx = igroup->ictx; struct iommufd_sw_msi_map *cur; if (igroup->sw_msi_start == PHYS_ADDR_MAX) return 0; /* * Install all the MSI pages the device has been using into the domain */ guard(mutex)(&ictx->sw_msi_lock); list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) { int rc; if (cur->sw_msi_start != igroup->sw_msi_start || !test_bit(cur->id, igroup->required_sw_msi.bitmap)) continue; rc = iommufd_sw_msi_install(ictx, hwpt_paging, cur); if (rc) return rc; } return 0; } #else static inline int iommufd_group_setup_msi(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { return 0; } #endif static bool iommufd_group_first_attach(struct iommufd_group *igroup, ioasid_t pasid) { lockdep_assert_held(&igroup->lock); return !xa_load(&igroup->pasid_attach, pasid); } static int iommufd_device_attach_reserved_iova(struct iommufd_device *idev, struct iommufd_hwpt_paging *hwpt_paging) { struct iommufd_group *igroup = idev->igroup; int rc; lockdep_assert_held(&igroup->lock); rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt, idev->dev, &igroup->sw_msi_start); if (rc) return rc; if (iommufd_group_first_attach(igroup, IOMMU_NO_PASID)) { rc = iommufd_group_setup_msi(igroup, hwpt_paging); if (rc) { iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); return rc; } } return 0; } /* The device attach/detach/replace helpers for attach_handle */ static bool iommufd_device_is_attached(struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_attach *attach; attach = xa_load(&idev->igroup->pasid_attach, pasid); return xa_load(&attach->device_array, idev->obj.id); } static int iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_group *igroup = idev->igroup; lockdep_assert_held(&igroup->lock); if (pasid == IOMMU_NO_PASID) { unsigned long start = IOMMU_NO_PASID; if (!hwpt->pasid_compat && xa_find_after(&igroup->pasid_attach, &start, UINT_MAX, XA_PRESENT)) return -EINVAL; } else { struct iommufd_attach *attach; if (!hwpt->pasid_compat) return -EINVAL; attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); if (attach && attach->hwpt && !attach->hwpt->pasid_compat) return -EINVAL; } return 0; } static bool iommufd_hwpt_compatible_device(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev) { struct pci_dev *pdev; if (!hwpt->fault || !dev_is_pci(idev->dev)) return true; /* * Once we turn on PCI/PRI support for VF, the response failure code * should not be forwarded to the hardware due to PRI being a shared * resource between PF and VFs. There is no coordination for this * shared capability. This waits for a vPRI reset to recover. */ pdev = to_pci_dev(idev->dev); return (!pdev->is_virtfn || !pci_pri_supported(pdev)); } static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_attach_handle *handle; int rc; if (!iommufd_hwpt_compatible_device(hwpt, idev)) return -EINVAL; rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); if (rc) return rc; handle = kzalloc(sizeof(*handle), GFP_KERNEL); if (!handle) return -ENOMEM; handle->idev = idev; if (pasid == IOMMU_NO_PASID) rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, &handle->handle); else rc = iommu_attach_device_pasid(hwpt->domain, idev->dev, pasid, &handle->handle); if (rc) goto out_free_handle; return 0; out_free_handle: kfree(handle); return rc; } static struct iommufd_attach_handle * iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid) { struct iommu_attach_handle *handle; lockdep_assert_held(&idev->igroup->lock); handle = iommu_attach_handle_get(idev->igroup->group, pasid, 0); if (IS_ERR(handle)) return NULL; return to_iommufd_handle(handle); } static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_attach_handle *handle; handle = iommufd_device_get_attach_handle(idev, pasid); if (pasid == IOMMU_NO_PASID) iommu_detach_group_handle(hwpt->domain, idev->igroup->group); else iommu_detach_device_pasid(hwpt->domain, idev->dev, pasid); iommufd_auto_response_faults(hwpt, handle); kfree(handle); } static int iommufd_hwpt_replace_device(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_hw_pagetable *hwpt, struct iommufd_hw_pagetable *old) { struct iommufd_attach_handle *handle, *old_handle; int rc; if (!iommufd_hwpt_compatible_device(hwpt, idev)) return -EINVAL; rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); if (rc) return rc; old_handle = iommufd_device_get_attach_handle(idev, pasid); handle = kzalloc(sizeof(*handle), GFP_KERNEL); if (!handle) return -ENOMEM; handle->idev = idev; if (pasid == IOMMU_NO_PASID) rc = iommu_replace_group_handle(idev->igroup->group, hwpt->domain, &handle->handle); else rc = iommu_replace_device_pasid(hwpt->domain, idev->dev, pasid, &handle->handle); if (rc) goto out_free_handle; iommufd_auto_response_faults(hwpt, old_handle); kfree(old_handle); return 0; out_free_handle: kfree(handle); return rc; } int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID; struct iommufd_group *igroup = idev->igroup; struct iommufd_hw_pagetable *old_hwpt; struct iommufd_attach *attach; int rc; mutex_lock(&igroup->lock); attach = xa_cmpxchg(&igroup->pasid_attach, pasid, NULL, XA_ZERO_ENTRY, GFP_KERNEL); if (xa_is_err(attach)) { rc = xa_err(attach); goto err_unlock; } if (!attach) { attach = kzalloc(sizeof(*attach), GFP_KERNEL); if (!attach) { rc = -ENOMEM; goto err_release_pasid; } xa_init(&attach->device_array); } old_hwpt = attach->hwpt; rc = xa_insert(&attach->device_array, idev->obj.id, XA_ZERO_ENTRY, GFP_KERNEL); if (rc) { WARN_ON(rc == -EBUSY && !old_hwpt); goto err_free_attach; } if (old_hwpt && old_hwpt != hwpt) { rc = -EINVAL; goto err_release_devid; } if (attach_resv) { rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging); if (rc) goto err_release_devid; } /* * Only attach to the group once for the first device that is in the * group. All the other devices will follow this attachment. The user * should attach every device individually to the hwpt as the per-device * reserved regions are only updated during individual device * attachment. */ if (iommufd_group_first_attach(igroup, pasid)) { rc = iommufd_hwpt_attach_device(hwpt, idev, pasid); if (rc) goto err_unresv; attach->hwpt = hwpt; WARN_ON(xa_is_err(xa_store(&igroup->pasid_attach, pasid, attach, GFP_KERNEL))); } refcount_inc(&hwpt->obj.users); WARN_ON(xa_is_err(xa_store(&attach->device_array, idev->obj.id, idev, GFP_KERNEL))); mutex_unlock(&igroup->lock); return 0; err_unresv: if (attach_resv) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); err_release_devid: xa_release(&attach->device_array, idev->obj.id); err_free_attach: if (iommufd_group_first_attach(igroup, pasid)) kfree(attach); err_release_pasid: if (iommufd_group_first_attach(igroup, pasid)) xa_release(&igroup->pasid_attach, pasid); err_unlock: mutex_unlock(&igroup->lock); return rc; } struct iommufd_hw_pagetable * iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_group *igroup = idev->igroup; struct iommufd_hwpt_paging *hwpt_paging; struct iommufd_hw_pagetable *hwpt; struct iommufd_attach *attach; mutex_lock(&igroup->lock); attach = xa_load(&igroup->pasid_attach, pasid); if (!attach) { mutex_unlock(&igroup->lock); return NULL; } hwpt = attach->hwpt; hwpt_paging = find_hwpt_paging(hwpt); xa_erase(&attach->device_array, idev->obj.id); if (xa_empty(&attach->device_array)) { iommufd_hwpt_detach_device(hwpt, idev, pasid); xa_erase(&igroup->pasid_attach, pasid); kfree(attach); } if (hwpt_paging && pasid == IOMMU_NO_PASID) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); mutex_unlock(&igroup->lock); iommufd_hw_pagetable_put(idev->ictx, hwpt); /* Caller must destroy hwpt */ return hwpt; } static struct iommufd_hw_pagetable * iommufd_device_do_attach(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_hw_pagetable *hwpt) { int rc; rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid); if (rc) return ERR_PTR(rc); return NULL; } static void iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { struct iommufd_attach *attach; struct iommufd_device *cur; unsigned long index; lockdep_assert_held(&igroup->lock); attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); xa_for_each(&attach->device_array, index, cur) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev); } static int iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { struct iommufd_hwpt_paging *old_hwpt_paging; struct iommufd_attach *attach; struct iommufd_device *cur; unsigned long index; int rc; lockdep_assert_held(&igroup->lock); attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); old_hwpt_paging = find_hwpt_paging(attach->hwpt); if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) { xa_for_each(&attach->device_array, index, cur) { rc = iopt_table_enforce_dev_resv_regions( &hwpt_paging->ioas->iopt, cur->dev, NULL); if (rc) goto err_unresv; } } rc = iommufd_group_setup_msi(igroup, hwpt_paging); if (rc) goto err_unresv; return 0; err_unresv: iommufd_group_remove_reserved_iova(igroup, hwpt_paging); return rc; } static struct iommufd_hw_pagetable * iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_hw_pagetable *hwpt) { struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID; struct iommufd_hwpt_paging *old_hwpt_paging; struct iommufd_group *igroup = idev->igroup; struct iommufd_hw_pagetable *old_hwpt; struct iommufd_attach *attach; unsigned int num_devices; int rc; mutex_lock(&igroup->lock); attach = xa_load(&igroup->pasid_attach, pasid); if (!attach) { rc = -EINVAL; goto err_unlock; } old_hwpt = attach->hwpt; WARN_ON(!old_hwpt || xa_empty(&attach->device_array)); if (!iommufd_device_is_attached(idev, pasid)) { rc = -EINVAL; goto err_unlock; } if (hwpt == old_hwpt) { mutex_unlock(&igroup->lock); return NULL; } if (attach_resv) { rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging); if (rc) goto err_unlock; } rc = iommufd_hwpt_replace_device(idev, pasid, hwpt, old_hwpt); if (rc) goto err_unresv; old_hwpt_paging = find_hwpt_paging(old_hwpt); if (old_hwpt_paging && pasid == IOMMU_NO_PASID && (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas)) iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging); attach->hwpt = hwpt; num_devices = iommufd_group_device_num(igroup, pasid); /* * Move the refcounts held by the device_array to the new hwpt. Retain a * refcount for this thread as the caller will free it. */ refcount_add(num_devices, &hwpt->obj.users); if (num_devices > 1) WARN_ON(refcount_sub_and_test(num_devices - 1, &old_hwpt->obj.users)); mutex_unlock(&igroup->lock); /* Caller must destroy old_hwpt */ return old_hwpt; err_unresv: if (attach_resv) iommufd_group_remove_reserved_iova(igroup, hwpt_paging); err_unlock: mutex_unlock(&igroup->lock); return ERR_PTR(rc); } typedef struct iommufd_hw_pagetable *(*attach_fn)( struct iommufd_device *idev, ioasid_t pasid, struct iommufd_hw_pagetable *hwpt); /* * When automatically managing the domains we search for a compatible domain in * the iopt and if one is found use it, otherwise create a new domain. * Automatic domain selection will never pick a manually created domain. */ static struct iommufd_hw_pagetable * iommufd_device_auto_get_domain(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_ioas *ioas, u32 *pt_id, attach_fn do_attach) { /* * iommufd_hw_pagetable_attach() is called by * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as * iommufd_device_do_attach(). So if we are in this mode then we prefer * to use the immediate_attach path as it supports drivers that can't * directly allocate a domain. */ bool immediate_attach = do_attach == iommufd_device_do_attach; struct iommufd_hw_pagetable *destroy_hwpt; struct iommufd_hwpt_paging *hwpt_paging; struct iommufd_hw_pagetable *hwpt; /* * There is no differentiation when domains are allocated, so any domain * that is willing to attach to the device is interchangeable with any * other. */ mutex_lock(&ioas->mutex); list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) { if (!hwpt_paging->auto_domain) continue; hwpt = &hwpt_paging->common; if (!iommufd_lock_obj(&hwpt->obj)) continue; destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) { iommufd_put_object(idev->ictx, &hwpt->obj); /* * -EINVAL means the domain is incompatible with the * device. Other error codes should propagate to * userspace as failure. Success means the domain is * attached. */ if (PTR_ERR(destroy_hwpt) == -EINVAL) continue; goto out_unlock; } *pt_id = hwpt->obj.id; iommufd_put_object(idev->ictx, &hwpt->obj); goto out_unlock; } hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, pasid, 0, immediate_attach, NULL); if (IS_ERR(hwpt_paging)) { destroy_hwpt = ERR_CAST(hwpt_paging); goto out_unlock; } hwpt = &hwpt_paging->common; if (!immediate_attach) { destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) goto out_abort; } else { destroy_hwpt = NULL; } hwpt_paging->auto_domain = true; *pt_id = hwpt->obj.id; iommufd_object_finalize(idev->ictx, &hwpt->obj); mutex_unlock(&ioas->mutex); return destroy_hwpt; out_abort: iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj); out_unlock: mutex_unlock(&ioas->mutex); return destroy_hwpt; } static int iommufd_device_change_pt(struct iommufd_device *idev, ioasid_t pasid, u32 *pt_id, attach_fn do_attach) { struct iommufd_hw_pagetable *destroy_hwpt; struct iommufd_object *pt_obj; pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY); if (IS_ERR(pt_obj)) return PTR_ERR(pt_obj); switch (pt_obj->type) { case IOMMUFD_OBJ_HWPT_NESTED: case IOMMUFD_OBJ_HWPT_PAGING: { struct iommufd_hw_pagetable *hwpt = container_of(pt_obj, struct iommufd_hw_pagetable, obj); destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; } case IOMMUFD_OBJ_IOAS: { struct iommufd_ioas *ioas = container_of(pt_obj, struct iommufd_ioas, obj); destroy_hwpt = iommufd_device_auto_get_domain(idev, pasid, ioas, pt_id, do_attach); if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; } default: destroy_hwpt = ERR_PTR(-EINVAL); goto out_put_pt_obj; } iommufd_put_object(idev->ictx, pt_obj); /* This destruction has to be after we unlock everything */ if (destroy_hwpt) iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt); return 0; out_put_pt_obj: iommufd_put_object(idev->ictx, pt_obj); return PTR_ERR(destroy_hwpt); } /** * iommufd_device_attach - Connect a device/pasid to an iommu_domain * @idev: device to attach * @pasid: pasid to attach * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING * Output the IOMMUFD_OBJ_HWPT_PAGING ID * * This connects the device/pasid to an iommu_domain, either automatically * or manually selected. Once this completes the device could do DMA with * @pasid. @pasid is IOMMU_NO_PASID if this attach is for no pasid usage. * * The caller should return the resulting pt_id back to userspace. * This function is undone by calling iommufd_device_detach(). */ int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, u32 *pt_id) { int rc; rc = iommufd_device_change_pt(idev, pasid, pt_id, &iommufd_device_do_attach); if (rc) return rc; /* * Pairs with iommufd_device_detach() - catches caller bugs attempting * to destroy a device with an attachment. */ refcount_inc(&idev->obj.users); return 0; } EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD"); /** * iommufd_device_replace - Change the device/pasid's iommu_domain * @idev: device to change * @pasid: pasid to change * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING * Output the IOMMUFD_OBJ_HWPT_PAGING ID * * This is the same as:: * * iommufd_device_detach(); * iommufd_device_attach(); * * If it fails then no change is made to the attachment. The iommu driver may * implement this so there is no disruption in translation. This can only be * called if iommufd_device_attach() has already succeeded. @pasid is * IOMMU_NO_PASID for no pasid usage. */ int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, u32 *pt_id) { return iommufd_device_change_pt(idev, pasid, pt_id, &iommufd_device_do_replace); } EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD"); /** * iommufd_device_detach - Disconnect a device/device to an iommu_domain * @idev: device to detach * @pasid: pasid to detach * * Undo iommufd_device_attach(). This disconnects the idev from the previously * attached pt_id. The device returns back to a blocked DMA translation. * @pasid is IOMMU_NO_PASID for no pasid usage. */ void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_hw_pagetable *hwpt; hwpt = iommufd_hw_pagetable_detach(idev, pasid); if (!hwpt) return; refcount_dec(&idev->obj.users); } EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, "IOMMUFD"); /* * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas. */ static int iommufd_access_change_ioas(struct iommufd_access *access, struct iommufd_ioas *new_ioas) { u32 iopt_access_list_id = access->iopt_access_list_id; struct iommufd_ioas *cur_ioas = access->ioas; int rc; lockdep_assert_held(&access->ioas_lock); /* We are racing with a concurrent detach, bail */ if (cur_ioas != access->ioas_unpin) return -EBUSY; if (cur_ioas == new_ioas) return 0; /* * Set ioas to NULL to block any further iommufd_access_pin_pages(). * iommufd_access_unpin_pages() can continue using access->ioas_unpin. */ access->ioas = NULL; if (new_ioas) { rc = iopt_add_access(&new_ioas->iopt, access); if (rc) { access->ioas = cur_ioas; return rc; } refcount_inc(&new_ioas->obj.users); } if (cur_ioas) { if (!iommufd_access_is_internal(access) && access->ops->unmap) { mutex_unlock(&access->ioas_lock); access->ops->unmap(access->data, 0, ULONG_MAX); mutex_lock(&access->ioas_lock); } iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id); refcount_dec(&cur_ioas->obj.users); } access->ioas = new_ioas; access->ioas_unpin = new_ioas; return 0; } static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id) { struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id); int rc; if (IS_ERR(ioas)) return PTR_ERR(ioas); rc = iommufd_access_change_ioas(access, ioas); iommufd_put_object(access->ictx, &ioas->obj); return rc; } void iommufd_access_destroy_object(struct iommufd_object *obj) { struct iommufd_access *access = container_of(obj, struct iommufd_access, obj); mutex_lock(&access->ioas_lock); if (access->ioas) WARN_ON(iommufd_access_change_ioas(access, NULL)); mutex_unlock(&access->ioas_lock); if (!iommufd_access_is_internal(access)) iommufd_ctx_put(access->ictx); } static struct iommufd_access *__iommufd_access_create(struct iommufd_ctx *ictx) { struct iommufd_access *access; /* * There is no uAPI for the access object, but to keep things symmetric * use the object infrastructure anyhow. */ access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); if (IS_ERR(access)) return access; /* The calling driver is a user until iommufd_access_destroy() */ refcount_inc(&access->obj.users); mutex_init(&access->ioas_lock); return access; } struct iommufd_access *iommufd_access_create_internal(struct iommufd_ctx *ictx) { struct iommufd_access *access; access = __iommufd_access_create(ictx); if (IS_ERR(access)) return access; access->iova_alignment = PAGE_SIZE; iommufd_object_finalize(ictx, &access->obj); return access; } /** * iommufd_access_create - Create an iommufd_access * @ictx: iommufd file descriptor * @ops: Driver's ops to associate with the access * @data: Opaque data to pass into ops functions * @id: Output ID number to return to userspace for this access * * An iommufd_access allows a driver to read/write to the IOAS without using * DMA. The underlying CPU memory can be accessed using the * iommufd_access_pin_pages() or iommufd_access_rw() functions. * * The provided ops are required to use iommufd_access_pin_pages(). */ struct iommufd_access * iommufd_access_create(struct iommufd_ctx *ictx, const struct iommufd_access_ops *ops, void *data, u32 *id) { struct iommufd_access *access; access = __iommufd_access_create(ictx); if (IS_ERR(access)) return access; access->data = data; access->ops = ops; if (ops->needs_pin_pages) access->iova_alignment = PAGE_SIZE; else access->iova_alignment = 1; access->ictx = ictx; iommufd_ctx_get(ictx); iommufd_object_finalize(ictx, &access->obj); *id = access->obj.id; return access; } EXPORT_SYMBOL_NS_GPL(iommufd_access_create, "IOMMUFD"); /** * iommufd_access_destroy - Destroy an iommufd_access * @access: The access to destroy * * The caller must stop using the access before destroying it. */ void iommufd_access_destroy(struct iommufd_access *access) { iommufd_object_destroy_user(access->ictx, &access->obj); } EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, "IOMMUFD"); void iommufd_access_detach(struct iommufd_access *access) { mutex_lock(&access->ioas_lock); if (WARN_ON(!access->ioas)) { mutex_unlock(&access->ioas_lock); return; } WARN_ON(iommufd_access_change_ioas(access, NULL)); mutex_unlock(&access->ioas_lock); } EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, "IOMMUFD"); int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) { int rc; mutex_lock(&access->ioas_lock); if (WARN_ON(access->ioas)) { mutex_unlock(&access->ioas_lock); return -EINVAL; } rc = iommufd_access_change_ioas_id(access, ioas_id); mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, "IOMMUFD"); int iommufd_access_attach_internal(struct iommufd_access *access, struct iommufd_ioas *ioas) { int rc; mutex_lock(&access->ioas_lock); if (WARN_ON(access->ioas)) { mutex_unlock(&access->ioas_lock); return -EINVAL; } rc = iommufd_access_change_ioas(access, ioas); mutex_unlock(&access->ioas_lock); return rc; } int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) { int rc; mutex_lock(&access->ioas_lock); if (!access->ioas) { mutex_unlock(&access->ioas_lock); return -ENOENT; } rc = iommufd_access_change_ioas_id(access, ioas_id); mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, "IOMMUFD"); /** * iommufd_access_notify_unmap - Notify users of an iopt to stop using it * @iopt: iopt to work on * @iova: Starting iova in the iopt * @length: Number of bytes * * After this function returns there should be no users attached to the pages * linked to this iopt that intersect with iova,length. Anyone that has attached * a user through iopt_access_pages() needs to detach it through * iommufd_access_unpin_pages() before this function returns. * * iommufd_access_destroy() will wait for any outstanding unmap callback to * complete. Once iommufd_access_destroy() no unmap ops are running or will * run in the future. Due to this a driver must not create locking that prevents * unmap to complete while iommufd_access_destroy() is running. */ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, unsigned long length) { struct iommufd_ioas *ioas = container_of(iopt, struct iommufd_ioas, iopt); struct iommufd_access *access; unsigned long index; xa_lock(&ioas->iopt.access_list); xa_for_each(&ioas->iopt.access_list, index, access) { if (!iommufd_lock_obj(&access->obj) || iommufd_access_is_internal(access)) continue; xa_unlock(&ioas->iopt.access_list); access->ops->unmap(access->data, iova, length); iommufd_put_object(access->ictx, &access->obj); xa_lock(&ioas->iopt.access_list); } xa_unlock(&ioas->iopt.access_list); } /** * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages * @access: IOAS access to act on * @iova: Starting IOVA * @length: Number of bytes to access * * Return the struct page's. The caller must stop accessing them before calling * this. The iova/length must exactly match the one provided to access_pages. */ void iommufd_access_unpin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length) { bool internal = iommufd_access_is_internal(access); struct iopt_area_contig_iter iter; struct io_pagetable *iopt; unsigned long last_iova; struct iopt_area *area; if (WARN_ON(!length) || WARN_ON(check_add_overflow(iova, length - 1, &last_iova))) return; mutex_lock(&access->ioas_lock); /* * The driver must be doing something wrong if it calls this before an * iommufd_access_attach() or after an iommufd_access_detach(). */ if (WARN_ON(!access->ioas_unpin)) { mutex_unlock(&access->ioas_lock); return; } iopt = &access->ioas_unpin->iopt; down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) iopt_area_remove_access( area, iopt_area_iova_to_index(area, iter.cur_iova), iopt_area_iova_to_index( area, min(last_iova, iopt_area_last_iova(area))), internal); WARN_ON(!iopt_area_contig_done(&iter)); up_read(&iopt->iova_rwsem); mutex_unlock(&access->ioas_lock); } EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, "IOMMUFD"); static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter) { if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE) return false; if (!iopt_area_contig_done(iter) && (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) % PAGE_SIZE) != (PAGE_SIZE - 1)) return false; return true; } static bool check_area_prot(struct iopt_area *area, unsigned int flags) { if (flags & IOMMUFD_ACCESS_RW_WRITE) return area->iommu_prot & IOMMU_WRITE; return area->iommu_prot & IOMMU_READ; } /** * iommufd_access_pin_pages() - Return a list of pages under the iova * @access: IOAS access to act on * @iova: Starting IOVA * @length: Number of bytes to access * @out_pages: Output page list * @flags: IOPMMUFD_ACCESS_RW_* flags * * Reads @length bytes starting at iova and returns the struct page * pointers. * These can be kmap'd by the caller for CPU access. * * The caller must perform iommufd_access_unpin_pages() when done to balance * this. * * This API always requires a page aligned iova. This happens naturally if the * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However * smaller alignments have corner cases where this API can fail on otherwise * aligned iova. */ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length, struct page **out_pages, unsigned int flags) { bool internal = iommufd_access_is_internal(access); struct iopt_area_contig_iter iter; struct io_pagetable *iopt; unsigned long last_iova; struct iopt_area *area; int rc; /* Driver's ops don't support pin_pages */ if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(access->iova_alignment != PAGE_SIZE || (!internal && !access->ops->unmap))) return -EINVAL; if (!length) return -EINVAL; if (check_add_overflow(iova, length - 1, &last_iova)) return -EOVERFLOW; mutex_lock(&access->ioas_lock); if (!access->ioas) { mutex_unlock(&access->ioas_lock); return -ENOENT; } iopt = &access->ioas->iopt; down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { unsigned long last = min(last_iova, iopt_area_last_iova(area)); unsigned long last_index = iopt_area_iova_to_index(area, last); unsigned long index = iopt_area_iova_to_index(area, iter.cur_iova); if (area->prevent_access || !iopt_area_contig_is_aligned(&iter)) { rc = -EINVAL; goto err_remove; } if (!check_area_prot(area, flags)) { rc = -EPERM; goto err_remove; } rc = iopt_area_add_access(area, index, last_index, out_pages, flags, internal); if (rc) goto err_remove; out_pages += last_index - index + 1; } if (!iopt_area_contig_done(&iter)) { rc = -ENOENT; goto err_remove; } up_read(&iopt->iova_rwsem); mutex_unlock(&access->ioas_lock); return 0; err_remove: if (iova < iter.cur_iova) { last_iova = iter.cur_iova - 1; iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) iopt_area_remove_access( area, iopt_area_iova_to_index(area, iter.cur_iova), iopt_area_iova_to_index( area, min(last_iova, iopt_area_last_iova(area))), internal); } up_read(&iopt->iova_rwsem); mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, "IOMMUFD"); /** * iommufd_access_rw - Read or write data under the iova * @access: IOAS access to act on * @iova: Starting IOVA * @data: Kernel buffer to copy to/from * @length: Number of bytes to access * @flags: IOMMUFD_ACCESS_RW_* flags * * Copy kernel to/from data into the range given by IOVA/length. If flags * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized * by changing it into copy_to/from_user(). */ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, void *data, size_t length, unsigned int flags) { struct iopt_area_contig_iter iter; struct io_pagetable *iopt; struct iopt_area *area; unsigned long last_iova; int rc = -EINVAL; if (!length) return -EINVAL; if (check_add_overflow(iova, length - 1, &last_iova)) return -EOVERFLOW; mutex_lock(&access->ioas_lock); if (!access->ioas) { mutex_unlock(&access->ioas_lock); return -ENOENT; } iopt = &access->ioas->iopt; down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { unsigned long last = min(last_iova, iopt_area_last_iova(area)); unsigned long bytes = (last - iter.cur_iova) + 1; if (area->prevent_access) { rc = -EINVAL; goto err_out; } if (!check_area_prot(area, flags)) { rc = -EPERM; goto err_out; } rc = iopt_pages_rw_access( area->pages, iopt_area_start_byte(area, iter.cur_iova), data, bytes, flags); if (rc) goto err_out; data += bytes; } if (!iopt_area_contig_done(&iter)) rc = -ENOENT; err_out: up_read(&iopt->iova_rwsem); mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, "IOMMUFD"); int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) { const u32 SUPPORTED_FLAGS = IOMMU_HW_INFO_FLAG_INPUT_TYPE; struct iommu_hw_info *cmd = ucmd->cmd; void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr); const struct iommu_ops *ops; struct iommufd_device *idev; unsigned int data_len; unsigned int copy_len; void *data; int rc; if (cmd->flags & ~SUPPORTED_FLAGS) return -EOPNOTSUPP; if (cmd->__reserved[0] || cmd->__reserved[1] || cmd->__reserved[2]) return -EOPNOTSUPP; /* Clear the type field since drivers don't support a random input */ if (!(cmd->flags & IOMMU_HW_INFO_FLAG_INPUT_TYPE)) cmd->in_data_type = IOMMU_HW_INFO_TYPE_DEFAULT; idev = iommufd_get_device(ucmd, cmd->dev_id); if (IS_ERR(idev)) return PTR_ERR(idev); ops = dev_iommu_ops(idev->dev); if (ops->hw_info) { data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type); if (IS_ERR(data)) { rc = PTR_ERR(data); goto out_put; } /* * drivers that have hw_info callback should have a unique * iommu_hw_info_type. */ if (WARN_ON_ONCE(cmd->out_data_type == IOMMU_HW_INFO_TYPE_NONE)) { rc = -EOPNOTSUPP; goto out_free; } } else { cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE; data_len = 0; data = NULL; } copy_len = min(cmd->data_len, data_len); if (copy_to_user(user_ptr, data, copy_len)) { rc = -EFAULT; goto out_free; } /* * Zero the trailing bytes if the user buffer is bigger than the * data size kernel actually has. */ if (copy_len < cmd->data_len) { if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) { rc = -EFAULT; goto out_free; } } /* * We return the length the kernel supports so userspace may know what * the kernel capability is. It could be larger than the input buffer. */ cmd->data_len = data_len; cmd->out_capabilities = 0; if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; cmd->out_max_pasid_log2 = 0; /* * Currently, all iommu drivers enable PASID in the probe_device() * op if iommu and device supports it. So the max_pasids stored in * dev->iommu indicates both PASID support and enable status. A * non-zero dev->iommu->max_pasids means PASID is supported and * enabled. The iommufd only reports PASID capability to userspace * if it's enabled. */ if (idev->dev->iommu->max_pasids) { cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids); if (dev_is_pci(idev->dev)) { struct pci_dev *pdev = to_pci_dev(idev->dev); int ctrl; ctrl = pci_pasid_status(pdev); WARN_ON_ONCE(ctrl < 0 || !(ctrl & PCI_PASID_CTRL_ENABLE)); if (ctrl & PCI_PASID_CTRL_EXEC) cmd->out_capabilities |= IOMMU_HW_CAP_PCI_PASID_EXEC; if (ctrl & PCI_PASID_CTRL_PRIV) cmd->out_capabilities |= IOMMU_HW_CAP_PCI_PASID_PRIV; } } rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_free: kfree(data); out_put: iommufd_put_object(ucmd->ictx, &idev->obj); return rc; } |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 | /* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/segment.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ #include <linux/blkdev.h> #include <linux/backing-dev.h> /* constant macro */ #define NULL_SEGNO ((unsigned int)(~0)) #define NULL_SECNO ((unsigned int)(~0)) #define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */ #define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */ #define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */ #define F2FS_MIN_META_SEGMENTS 8 /* SB + 2 (CP + SIT + NAT) + SSA */ #define INVALID_MTIME ULLONG_MAX /* no valid blocks in a segment/section */ /* L: Logical segment # in volume, R: Relative segment # in main area */ #define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno) #define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno) #define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA) #define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE) #define SE_PAGETYPE(se) ((IS_NODESEG((se)->type) ? NODE : DATA)) static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, unsigned short seg_type) { f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG); } #define MAIN_BLKADDR(sbi) \ (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \ le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr)) #define SEG0_BLKADDR(sbi) \ (SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr : \ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr)) #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) #define MAIN_SECS(sbi) ((sbi)->total_sections) #define TOTAL_SEGS(sbi) \ (SM_I(sbi) ? SM_I(sbi)->segment_count : \ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count)) #define TOTAL_BLKS(sbi) (SEGS_TO_BLKS(sbi, TOTAL_SEGS(sbi))) #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) #define SEGMENT_SIZE(sbi) (1ULL << ((sbi)->log_blocksize + \ (sbi)->log_blocks_per_seg)) #define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \ (SEGS_TO_BLKS(sbi, GET_R2L_SEGNO(FREE_I(sbi), segno)))) #define NEXT_FREE_BLKADDR(sbi, curseg) \ (START_BLOCK(sbi, (curseg)->segno) + (curseg)->next_blkoff) #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi)) #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ (BLKS_TO_SEGS(sbi, GET_SEGOFF_FROM_SEG0(sbi, blk_addr))) #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (BLKS_PER_SEG(sbi) - 1)) #define GET_SEGNO(sbi, blk_addr) \ ((!__is_valid_data_blkaddr(blk_addr)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) #define CAP_BLKS_PER_SEC(sbi) \ (BLKS_PER_SEC(sbi) - (sbi)->unusable_blocks_per_sec) #define CAP_SEGS_PER_SEC(sbi) \ (SEGS_PER_SEC(sbi) - \ BLKS_TO_SEGS(sbi, (sbi)->unusable_blocks_per_sec)) #define GET_START_SEG_FROM_SEC(sbi, segno) \ (rounddown(segno, SEGS_PER_SEC(sbi))) #define GET_SEC_FROM_SEG(sbi, segno) \ (((segno) == -1) ? -1 : (segno) / SEGS_PER_SEC(sbi)) #define GET_SEG_FROM_SEC(sbi, secno) \ ((secno) * SEGS_PER_SEC(sbi)) #define GET_ZONE_FROM_SEC(sbi, secno) \ (((secno) == -1) ? -1 : (secno) / (sbi)->secs_per_zone) #define GET_ZONE_FROM_SEG(sbi, segno) \ GET_ZONE_FROM_SEC(sbi, GET_SEC_FROM_SEG(sbi, segno)) #define GET_SUM_BLOCK(sbi, segno) \ ((sbi)->sm_info->ssa_blkaddr + (segno)) #define GET_SUM_TYPE(footer) ((footer)->entry_type) #define SET_SUM_TYPE(footer, type) ((footer)->entry_type = (type)) #define SIT_ENTRY_OFFSET(sit_i, segno) \ ((segno) % (sit_i)->sents_per_block) #define SIT_BLOCK_OFFSET(segno) \ ((segno) / SIT_ENTRY_PER_BLOCK) #define START_SEGNO(segno) \ (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK) #define SIT_BLK_CNT(sbi) \ DIV_ROUND_UP(MAIN_SEGS(sbi), SIT_ENTRY_PER_BLOCK) #define f2fs_bitmap_size(nr) \ (BITS_TO_LONGS(nr) * sizeof(unsigned long)) #define SECTOR_FROM_BLOCK(blk_addr) \ (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) #define SECTOR_TO_BLOCK(sectors) \ ((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK) /* * In the victim_sel_policy->alloc_mode, there are three block allocation modes. * LFS writes data sequentially with cleaning operations. * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations. * AT_SSR (Age Threshold based Slack Space Recycle) merges fragments into * fragmented segment which has similar aging degree. */ enum { LFS = 0, SSR, AT_SSR, }; /* * In the victim_sel_policy->gc_mode, there are three gc, aka cleaning, modes. * GC_CB is based on cost-benefit algorithm. * GC_GREEDY is based on greedy algorithm. * GC_AT is based on age-threshold algorithm. */ enum { GC_CB = 0, GC_GREEDY, GC_AT, ALLOC_NEXT, FLUSH_DEVICE, MAX_GC_POLICY, }; /* * BG_GC means the background cleaning job. * FG_GC means the on-demand cleaning job. */ enum { BG_GC = 0, FG_GC, }; /* for a function parameter to select a victim segment */ struct victim_sel_policy { int alloc_mode; /* LFS or SSR */ int gc_mode; /* GC_CB or GC_GREEDY */ unsigned long *dirty_bitmap; /* dirty segment/section bitmap */ unsigned int max_search; /* * maximum # of segments/sections * to search */ unsigned int offset; /* last scanned bitmap offset */ unsigned int ofs_unit; /* bitmap search unit */ unsigned int min_cost; /* minimum cost */ unsigned long long oldest_age; /* oldest age of segments having the same min cost */ unsigned int min_segno; /* segment # having min. cost */ unsigned long long age; /* mtime of GCed section*/ unsigned long long age_threshold;/* age threshold */ bool one_time_gc; /* one time GC */ }; struct seg_entry { unsigned int type:6; /* segment type like CURSEG_XXX_TYPE */ unsigned int valid_blocks:10; /* # of valid blocks */ unsigned int ckpt_valid_blocks:10; /* # of valid blocks last cp */ unsigned int padding:6; /* padding */ unsigned char *cur_valid_map; /* validity bitmap of blocks */ #ifdef CONFIG_F2FS_CHECK_FS unsigned char *cur_valid_map_mir; /* mirror of current valid bitmap */ #endif /* * # of valid blocks and the validity bitmap stored in the last * checkpoint pack. This information is used by the SSR mode. */ unsigned char *ckpt_valid_map; /* validity bitmap of blocks last cp */ unsigned char *discard_map; unsigned long long mtime; /* modification time of the segment */ }; struct sec_entry { unsigned int valid_blocks; /* # of valid blocks in a section */ unsigned int ckpt_valid_blocks; /* # of valid blocks last cp in a section */ }; #define MAX_SKIP_GC_COUNT 16 struct revoke_entry { struct list_head list; block_t old_addr; /* for revoking when fail to commit */ pgoff_t index; }; struct sit_info { block_t sit_base_addr; /* start block address of SIT area */ block_t sit_blocks; /* # of blocks used by SIT area */ block_t written_valid_blocks; /* # of valid blocks in main area */ char *bitmap; /* all bitmaps pointer */ char *sit_bitmap; /* SIT bitmap pointer */ #ifdef CONFIG_F2FS_CHECK_FS char *sit_bitmap_mir; /* SIT bitmap mirror */ /* bitmap of segments to be ignored by GC in case of errors */ unsigned long *invalid_segmap; #endif unsigned int bitmap_size; /* SIT bitmap size */ unsigned long *tmp_map; /* bitmap for temporal use */ unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ unsigned int dirty_sentries; /* # of dirty sentries */ unsigned int sents_per_block; /* # of SIT entries per block */ struct rw_semaphore sentry_lock; /* to protect SIT cache */ struct seg_entry *sentries; /* SIT segment-level cache */ struct sec_entry *sec_entries; /* SIT section-level cache */ /* for cost-benefit algorithm in cleaning procedure */ unsigned long long elapsed_time; /* elapsed time after mount */ unsigned long long mounted_time; /* mount time */ unsigned long long min_mtime; /* min. modification time */ unsigned long long max_mtime; /* max. modification time */ unsigned long long dirty_min_mtime; /* rerange candidates in GC_AT */ unsigned long long dirty_max_mtime; /* rerange candidates in GC_AT */ unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */ }; struct free_segmap_info { unsigned int start_segno; /* start segment number logically */ unsigned int free_segments; /* # of free segments */ unsigned int free_sections; /* # of free sections */ spinlock_t segmap_lock; /* free segmap lock */ unsigned long *free_segmap; /* free segment bitmap */ unsigned long *free_secmap; /* free section bitmap */ }; /* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */ enum dirty_type { DIRTY_HOT_DATA, /* dirty segments assigned as hot data logs */ DIRTY_WARM_DATA, /* dirty segments assigned as warm data logs */ DIRTY_COLD_DATA, /* dirty segments assigned as cold data logs */ DIRTY_HOT_NODE, /* dirty segments assigned as hot node logs */ DIRTY_WARM_NODE, /* dirty segments assigned as warm node logs */ DIRTY_COLD_NODE, /* dirty segments assigned as cold node logs */ DIRTY, /* to count # of dirty segments */ PRE, /* to count # of entirely obsolete segments */ NR_DIRTY_TYPE }; struct dirty_seglist_info { unsigned long *dirty_segmap[NR_DIRTY_TYPE]; unsigned long *dirty_secmap; struct mutex seglist_lock; /* lock for segment bitmaps */ int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */ unsigned long *victim_secmap; /* background GC victims */ unsigned long *pinned_secmap; /* pinned victims from foreground GC */ unsigned int pinned_secmap_cnt; /* count of victims which has pinned data */ bool enable_pin_section; /* enable pinning section */ }; /* for active log information */ struct curseg_info { struct mutex curseg_mutex; /* lock for consistency */ struct f2fs_summary_block *sum_blk; /* cached summary block */ struct rw_semaphore journal_rwsem; /* protect journal area */ struct f2fs_journal *journal; /* cached journal info */ unsigned char alloc_type; /* current allocation type */ unsigned short seg_type; /* segment type like CURSEG_XXX_TYPE */ unsigned int segno; /* current segment number */ unsigned short next_blkoff; /* next block offset to write */ unsigned int zone; /* current zone number */ unsigned int next_segno; /* preallocated segment */ int fragment_remained_chunk; /* remained block size in a chunk for block fragmentation mode */ bool inited; /* indicate inmem log is inited */ }; struct sit_entry_set { struct list_head set_list; /* link with all sit sets */ unsigned int start_segno; /* start segno of sits in set */ unsigned int entry_cnt; /* the # of sit entries in set */ }; /* * inline functions */ static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) { return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); } static inline bool is_curseg(struct f2fs_sb_info *sbi, unsigned int segno) { int i; for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { if (segno == CURSEG_I(sbi, i)->segno) return true; } return false; } static inline bool is_cursec(struct f2fs_sb_info *sbi, unsigned int secno) { int i; for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { if (secno == GET_SEC_FROM_SEG(sbi, CURSEG_I(sbi, i)->segno)) return true; } return false; } static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); return &sit_i->sentries[segno]; } static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); return &sit_i->sec_entries[GET_SEC_FROM_SEG(sbi, segno)]; } static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, unsigned int segno, bool use_section) { /* * In order to get # of valid blocks in a section instantly from many * segments, f2fs manages two counting structures separately. */ if (use_section && __is_large_section(sbi)) return get_sec_entry(sbi, segno)->valid_blocks; else return get_seg_entry(sbi, segno)->valid_blocks; } static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi, unsigned int segno, bool use_section) { if (use_section && __is_large_section(sbi)) return get_sec_entry(sbi, segno)->ckpt_valid_blocks; else return get_seg_entry(sbi, segno)->ckpt_valid_blocks; } static inline void set_ckpt_valid_blocks(struct f2fs_sb_info *sbi, unsigned int segno) { unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int blocks = 0; int i; for (i = 0; i < SEGS_PER_SEC(sbi); i++, start_segno++) { struct seg_entry *se = get_seg_entry(sbi, start_segno); blocks += se->ckpt_valid_blocks; } get_sec_entry(sbi, segno)->ckpt_valid_blocks = blocks; } #ifdef CONFIG_F2FS_CHECK_FS static inline void sanity_check_valid_blocks(struct f2fs_sb_info *sbi, unsigned int segno) { unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int blocks = 0; int i; for (i = 0; i < SEGS_PER_SEC(sbi); i++, start_segno++) { struct seg_entry *se = get_seg_entry(sbi, start_segno); blocks += se->ckpt_valid_blocks; } if (blocks != get_sec_entry(sbi, segno)->ckpt_valid_blocks) { f2fs_err(sbi, "Inconsistent ckpt valid blocks: " "seg entry(%d) vs sec entry(%d) at secno %d", blocks, get_sec_entry(sbi, segno)->ckpt_valid_blocks, secno); f2fs_bug_on(sbi, 1); } } #else static inline void sanity_check_valid_blocks(struct f2fs_sb_info *sbi, unsigned int segno) { } #endif static inline void seg_info_from_raw_sit(struct seg_entry *se, struct f2fs_sit_entry *rs) { se->valid_blocks = GET_SIT_VBLOCKS(rs); se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs); memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); #ifdef CONFIG_F2FS_CHECK_FS memcpy(se->cur_valid_map_mir, rs->valid_map, SIT_VBLOCK_MAP_SIZE); #endif se->type = GET_SIT_TYPE(rs); se->mtime = le64_to_cpu(rs->mtime); } static inline void __seg_info_to_raw_sit(struct seg_entry *se, struct f2fs_sit_entry *rs) { unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) | se->valid_blocks; rs->vblocks = cpu_to_le16(raw_vblocks); memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); rs->mtime = cpu_to_le64(se->mtime); } static inline void seg_info_to_sit_folio(struct f2fs_sb_info *sbi, struct folio *folio, unsigned int start) { struct f2fs_sit_block *raw_sit; struct seg_entry *se; struct f2fs_sit_entry *rs; unsigned int end = min(start + SIT_ENTRY_PER_BLOCK, (unsigned long)MAIN_SEGS(sbi)); int i; raw_sit = folio_address(folio); memset(raw_sit, 0, PAGE_SIZE); for (i = 0; i < end - start; i++) { rs = &raw_sit->entries[i]; se = get_seg_entry(sbi, start + i); __seg_info_to_raw_sit(se, rs); } } static inline void seg_info_to_raw_sit(struct seg_entry *se, struct f2fs_sit_entry *rs) { __seg_info_to_raw_sit(se, rs); memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); se->ckpt_valid_blocks = se->valid_blocks; } static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, unsigned int max, unsigned int segno) { unsigned int ret; spin_lock(&free_i->segmap_lock); ret = find_next_bit(free_i->free_segmap, max, segno); spin_unlock(&free_i->segmap_lock); return ret; } static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; spin_lock(&free_i->segmap_lock); clear_bit(segno, free_i->free_segmap); free_i->free_segments++; next = find_next_bit(free_i->free_segmap, start_segno + SEGS_PER_SEC(sbi), start_segno); if (next >= start_segno + f2fs_usable_segs_in_sec(sbi)) { clear_bit(secno, free_i->free_secmap); free_i->free_sections++; } spin_unlock(&free_i->segmap_lock); } static inline void __set_inuse(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); set_bit(segno, free_i->free_segmap); free_i->free_segments--; if (!test_and_set_bit(secno, free_i->free_secmap)) free_i->free_sections--; } static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int segno, bool inmem) { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; bool ret; spin_lock(&free_i->segmap_lock); ret = test_and_clear_bit(segno, free_i->free_segmap); if (!ret) goto unlock_out; free_i->free_segments++; if (!inmem && is_cursec(sbi, secno)) goto unlock_out; /* check large section */ next = find_next_bit(free_i->free_segmap, start_segno + SEGS_PER_SEC(sbi), start_segno); if (next < start_segno + f2fs_usable_segs_in_sec(sbi)) goto unlock_out; ret = test_and_clear_bit(secno, free_i->free_secmap); if (!ret) goto unlock_out; free_i->free_sections++; if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[BG_GC]) == secno) sbi->next_victim_seg[BG_GC] = NULL_SEGNO; if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[FG_GC]) == secno) sbi->next_victim_seg[FG_GC] = NULL_SEGNO; unlock_out: spin_unlock(&free_i->segmap_lock); } static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); spin_lock(&free_i->segmap_lock); if (!test_and_set_bit(segno, free_i->free_segmap)) { free_i->free_segments--; if (!test_and_set_bit(secno, free_i->free_secmap)) free_i->free_sections--; } spin_unlock(&free_i->segmap_lock); } static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, void *dst_addr) { struct sit_info *sit_i = SIT_I(sbi); #ifdef CONFIG_F2FS_CHECK_FS if (memcmp(sit_i->sit_bitmap, sit_i->sit_bitmap_mir, sit_i->bitmap_size)) f2fs_bug_on(sbi, 1); #endif memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size); } static inline block_t written_block_count(struct f2fs_sb_info *sbi) { return SIT_I(sbi)->written_valid_blocks; } static inline unsigned int free_segments(struct f2fs_sb_info *sbi) { return FREE_I(sbi)->free_segments; } static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi) { return SM_I(sbi)->reserved_segments; } static inline unsigned int free_sections(struct f2fs_sb_info *sbi) { return FREE_I(sbi)->free_sections; } static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) { return DIRTY_I(sbi)->nr_dirty[PRE]; } static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi) { return DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_DATA] + DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_DATA] + DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_DATA] + DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_NODE] + DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_NODE] + DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE]; } static inline int overprovision_segments(struct f2fs_sb_info *sbi) { return SM_I(sbi)->ovp_segments; } static inline int reserved_sections(struct f2fs_sb_info *sbi) { return GET_SEC_FROM_SEG(sbi, reserved_segments(sbi)); } static inline unsigned int get_left_section_blocks(struct f2fs_sb_info *sbi, enum log_type type, unsigned int segno) { if (f2fs_lfs_mode(sbi) && __is_large_section(sbi)) return CAP_BLKS_PER_SEC(sbi) - SEGS_TO_BLKS(sbi, (segno - GET_START_SEG_FROM_SEC(sbi, segno))) - CURSEG_I(sbi, type)->next_blkoff; return CAP_BLKS_PER_SEC(sbi) - get_ckpt_valid_blocks(sbi, segno, true); } static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi, unsigned int node_blocks, unsigned int data_blocks, unsigned int dent_blocks) { unsigned int segno, left_blocks, blocks; int i; /* check current data/node sections in the worst case. */ for (i = CURSEG_HOT_DATA; i < NR_PERSISTENT_LOG; i++) { segno = CURSEG_I(sbi, i)->segno; if (unlikely(segno == NULL_SEGNO)) return false; left_blocks = get_left_section_blocks(sbi, i, segno); blocks = i <= CURSEG_COLD_DATA ? data_blocks : node_blocks; if (blocks > left_blocks) return false; } /* check current data section for dentry blocks. */ segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno; if (unlikely(segno == NULL_SEGNO)) return false; left_blocks = get_left_section_blocks(sbi, CURSEG_HOT_DATA, segno); if (dent_blocks > left_blocks) return false; return true; } /* * calculate needed sections for dirty node/dentry and call * has_curseg_enough_space, please note that, it needs to account * dirty data as well in lfs mode when checkpoint is disabled. */ static inline void __get_secs_required(struct f2fs_sb_info *sbi, unsigned int *lower_p, unsigned int *upper_p, bool *curseg_p) { unsigned int total_node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) + get_pages(sbi, F2FS_DIRTY_DENTS) + get_pages(sbi, F2FS_DIRTY_IMETA); unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS); unsigned int total_data_blocks = 0; unsigned int node_secs = total_node_blocks / CAP_BLKS_PER_SEC(sbi); unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi); unsigned int data_secs = 0; unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi); unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi); unsigned int data_blocks = 0; if (f2fs_lfs_mode(sbi)) { total_data_blocks = get_pages(sbi, F2FS_DIRTY_DATA); data_secs = total_data_blocks / CAP_BLKS_PER_SEC(sbi); data_blocks = total_data_blocks % CAP_BLKS_PER_SEC(sbi); } if (lower_p) *lower_p = node_secs + dent_secs + data_secs; if (upper_p) *upper_p = node_secs + dent_secs + data_secs + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0) + (data_blocks ? 1 : 0); if (curseg_p) *curseg_p = has_curseg_enough_space(sbi, node_blocks, data_blocks, dent_blocks); } static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed, int needed) { unsigned int free_secs, lower_secs, upper_secs; bool curseg_space; if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; __get_secs_required(sbi, &lower_secs, &upper_secs, &curseg_space); free_secs = free_sections(sbi) + freed; lower_secs += needed + reserved_sections(sbi); upper_secs += needed + reserved_sections(sbi); if (free_secs > upper_secs) return false; if (free_secs <= lower_secs) return true; return !curseg_space; } static inline bool has_enough_free_secs(struct f2fs_sb_info *sbi, int freed, int needed) { return !has_not_enough_free_secs(sbi, freed, needed); } static inline bool has_enough_free_blks(struct f2fs_sb_info *sbi) { unsigned int total_free_blocks = 0; unsigned int avail_user_block_count; spin_lock(&sbi->stat_lock); avail_user_block_count = get_available_block_count(sbi, NULL, true); total_free_blocks = avail_user_block_count - (unsigned int)valid_user_blocks(sbi); spin_unlock(&sbi->stat_lock); return total_free_blocks > 0; } static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) { if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; if (likely(has_enough_free_secs(sbi, 0, 0))) return true; if (!f2fs_lfs_mode(sbi) && likely(has_enough_free_blks(sbi))) return true; return false; } static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) { return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments; } static inline int utilization(struct f2fs_sb_info *sbi) { return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count); } /* * Sometimes f2fs may be better to drop out-of-place update policy. * And, users can control the policy through sysfs entries. * There are five policies with triggering conditions as follows. * F2FS_IPU_FORCE - all the time, * F2FS_IPU_SSR - if SSR mode is activated, * F2FS_IPU_UTIL - if FS utilization is over threashold, * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over * threashold, * F2FS_IPU_FSYNC - activated in fsync path only for high performance flash * storages. IPU will be triggered only if the # of dirty * pages over min_fsync_blocks. (=default option) * F2FS_IPU_ASYNC - do IPU given by asynchronous write requests. * F2FS_IPU_NOCACHE - disable IPU bio cache. * F2FS_IPU_HONOR_OPU_WRITE - use OPU write prior to IPU write if inode has * FI_OPU_WRITE flag. * F2FS_IPU_DISABLE - disable IPU. (=default option in LFS mode) */ #define DEF_MIN_IPU_UTIL 70 #define DEF_MIN_FSYNC_BLOCKS 8 #define DEF_MIN_HOT_BLOCKS 16 #define SMALL_VOLUME_SEGMENTS (16 * 512) /* 16GB */ #define F2FS_IPU_DISABLE 0 /* Modification on enum should be synchronized with ipu_mode_names array */ enum { F2FS_IPU_FORCE, F2FS_IPU_SSR, F2FS_IPU_UTIL, F2FS_IPU_SSR_UTIL, F2FS_IPU_FSYNC, F2FS_IPU_ASYNC, F2FS_IPU_NOCACHE, F2FS_IPU_HONOR_OPU_WRITE, F2FS_IPU_MAX, }; static inline bool IS_F2FS_IPU_DISABLE(struct f2fs_sb_info *sbi) { return SM_I(sbi)->ipu_policy == F2FS_IPU_DISABLE; } #define F2FS_IPU_POLICY(name) \ static inline bool IS_##name(struct f2fs_sb_info *sbi) \ { \ return SM_I(sbi)->ipu_policy & BIT(name); \ } F2FS_IPU_POLICY(F2FS_IPU_FORCE); F2FS_IPU_POLICY(F2FS_IPU_SSR); F2FS_IPU_POLICY(F2FS_IPU_UTIL); F2FS_IPU_POLICY(F2FS_IPU_SSR_UTIL); F2FS_IPU_POLICY(F2FS_IPU_FSYNC); F2FS_IPU_POLICY(F2FS_IPU_ASYNC); F2FS_IPU_POLICY(F2FS_IPU_NOCACHE); F2FS_IPU_POLICY(F2FS_IPU_HONOR_OPU_WRITE); static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); return curseg->segno; } static inline unsigned char curseg_alloc_type(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); return curseg->alloc_type; } static inline bool valid_main_segno(struct f2fs_sb_info *sbi, unsigned int segno) { return segno <= (MAIN_SEGS(sbi) - 1); } static inline void verify_fio_blkaddr(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; if (__is_valid_data_blkaddr(fio->old_blkaddr)) verify_blkaddr(sbi, fio->old_blkaddr, __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC); verify_blkaddr(sbi, fio->new_blkaddr, __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC_ENHANCE); } /* * Summary block is always treated as an invalid block */ static inline int check_block_count(struct f2fs_sb_info *sbi, int segno, struct f2fs_sit_entry *raw_sit) { bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; int valid_blocks = 0; int cur_pos = 0, next_pos; unsigned int usable_blks_per_seg = f2fs_usable_blks_in_seg(sbi, segno); /* check bitmap with valid block count */ do { if (is_valid) { next_pos = find_next_zero_bit_le(&raw_sit->valid_map, usable_blks_per_seg, cur_pos); valid_blocks += next_pos - cur_pos; } else next_pos = find_next_bit_le(&raw_sit->valid_map, usable_blks_per_seg, cur_pos); cur_pos = next_pos; is_valid = !is_valid; } while (cur_pos < usable_blks_per_seg); if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) { f2fs_err(sbi, "Mismatch valid blocks %d vs. %d", GET_SIT_VBLOCKS(raw_sit), valid_blocks); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INCONSISTENT_SIT); return -EFSCORRUPTED; } if (usable_blks_per_seg < BLKS_PER_SEG(sbi)) f2fs_bug_on(sbi, find_next_bit_le(&raw_sit->valid_map, BLKS_PER_SEG(sbi), usable_blks_per_seg) != BLKS_PER_SEG(sbi)); /* check segment usage, and check boundary of a given segment number */ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > usable_blks_per_seg || !valid_main_segno(sbi, segno))) { f2fs_err(sbi, "Wrong valid blocks %d or segno %u", GET_SIT_VBLOCKS(raw_sit), segno); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INCONSISTENT_SIT); return -EFSCORRUPTED; } return 0; } static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, unsigned int start) { struct sit_info *sit_i = SIT_I(sbi); unsigned int offset = SIT_BLOCK_OFFSET(start); block_t blk_addr = sit_i->sit_base_addr + offset; f2fs_bug_on(sbi, !valid_main_segno(sbi, start)); #ifdef CONFIG_F2FS_CHECK_FS if (f2fs_test_bit(offset, sit_i->sit_bitmap) != f2fs_test_bit(offset, sit_i->sit_bitmap_mir)) f2fs_bug_on(sbi, 1); #endif /* calculate sit block address */ if (f2fs_test_bit(offset, sit_i->sit_bitmap)) blk_addr += sit_i->sit_blocks; return blk_addr; } static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi, pgoff_t block_addr) { struct sit_info *sit_i = SIT_I(sbi); block_addr -= sit_i->sit_base_addr; if (block_addr < sit_i->sit_blocks) block_addr += sit_i->sit_blocks; else block_addr -= sit_i->sit_blocks; return block_addr + sit_i->sit_base_addr; } static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) { unsigned int block_off = SIT_BLOCK_OFFSET(start); f2fs_change_bit(block_off, sit_i->sit_bitmap); #ifdef CONFIG_F2FS_CHECK_FS f2fs_change_bit(block_off, sit_i->sit_bitmap_mir); #endif } static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi, bool base_time) { struct sit_info *sit_i = SIT_I(sbi); time64_t diff, now = ktime_get_boottime_seconds(); if (now >= sit_i->mounted_time) return sit_i->elapsed_time + now - sit_i->mounted_time; /* system time is set to the past */ if (!base_time) { diff = sit_i->mounted_time - now; if (sit_i->elapsed_time >= diff) return sit_i->elapsed_time - diff; return 0; } return sit_i->elapsed_time; } static inline void set_summary(struct f2fs_summary *sum, nid_t nid, unsigned int ofs_in_node, unsigned char version) { sum->nid = cpu_to_le32(nid); sum->ofs_in_node = cpu_to_le16(ofs_in_node); sum->version = version; } static inline block_t start_sum_block(struct f2fs_sb_info *sbi) { return __start_cp_addr(sbi) + le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); } static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) { return __start_cp_addr(sbi) + le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count) - (base + 1) + type; } static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) { if (is_cursec(sbi, secno) || (sbi->cur_victim_sec == secno)) return true; return false; } /* * It is very important to gather dirty pages and write at once, so that we can * submit a big bio without interfering other data writes. * By default, 512 pages for directory data, * 512 pages (2MB) * 8 for nodes, and * 256 pages * 8 for meta are set. */ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) { if (sbi->sb->s_bdi->wb.dirty_exceeded) return 0; if (type == DATA) return BLKS_PER_SEG(sbi); else if (type == NODE) return SEGS_TO_BLKS(sbi, 8); else if (type == META) return 8 * BIO_MAX_VECS; else return 0; } /* * When writing pages, it'd better align nr_to_write for segment size. */ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, struct writeback_control *wbc) { long nr_to_write, desired; if (wbc->sync_mode != WB_SYNC_NONE) return 0; nr_to_write = wbc->nr_to_write; desired = BIO_MAX_VECS; if (type == NODE) desired <<= 1; wbc->nr_to_write = desired; return desired - nr_to_write; } static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; bool wakeup = false; int i; if (force) goto wake_up; mutex_lock(&dcc->cmd_lock); for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { if (i + 1 < dcc->discard_granularity) break; if (!list_empty(&dcc->pend_list[i])) { wakeup = true; break; } } mutex_unlock(&dcc->cmd_lock); if (!wakeup || !is_idle(sbi, DISCARD_TIME)) return; wake_up: dcc->discard_wake = true; wake_up_interruptible_all(&dcc->discard_wait_queue); } |
| 10 9 10 10 5 5 5 5 10 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | // SPDX-License-Identifier: GPL-2.0 /* * Parts of this file are * Copyright (C) 2022-2023 Intel Corporation */ #include <linux/ieee80211.h> #include <linux/export.h> #include <net/cfg80211.h> #include "nl80211.h" #include "core.h" #include "rdev-ops.h" static int ___cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, unsigned int link_id, bool notify) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; lockdep_assert_wiphy(wdev->wiphy); if (!rdev->ops->stop_ap) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; if (!wdev->links[link_id].ap.beacon_interval) return -ENOENT; err = rdev_stop_ap(rdev, dev, link_id); if (!err) { wdev->conn_owner_nlportid = 0; wdev->links[link_id].ap.beacon_interval = 0; memset(&wdev->links[link_id].ap.chandef, 0, sizeof(wdev->links[link_id].ap.chandef)); wdev->u.ap.ssid_len = 0; rdev_set_qos_map(rdev, dev, NULL); if (notify) nl80211_send_ap_stopped(wdev, link_id); /* Should we apply the grace period during beaconing interface * shutdown also? */ cfg80211_sched_dfs_chan_update(rdev); } schedule_work(&cfg80211_disconnect_work); return err; } int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, int link_id, bool notify) { unsigned int link; int ret = 0; if (link_id >= 0) return ___cfg80211_stop_ap(rdev, dev, link_id, notify); for_each_valid_link(dev->ieee80211_ptr, link) { int ret1 = ___cfg80211_stop_ap(rdev, dev, link, notify); if (ret1) ret = ret1; /* try the next one also if one errored */ } return ret; } |
| 2 1 2 3 2 3 3 3 4 2 2 1 3 2 2 3 3 3 1 2 4 3 3 3 2 3 3 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 | // SPDX-License-Identifier: GPL-2.0-only /* * TCP Westwood+: end-to-end bandwidth estimation for TCP * * Angelo Dell'Aera: author of the first version of TCP Westwood+ in Linux 2.4 * * Support at http://c3lab.poliba.it/index.php/Westwood * Main references in literature: * * - Mascolo S, Casetti, M. Gerla et al. * "TCP Westwood: bandwidth estimation for TCP" Proc. ACM Mobicom 2001 * * - A. Grieco, s. Mascolo * "Performance evaluation of New Reno, Vegas, Westwood+ TCP" ACM Computer * Comm. Review, 2004 * * - A. Dell'Aera, L. Grieco, S. Mascolo. * "Linux 2.4 Implementation of Westwood+ TCP with Rate-Halving : * A Performance Evaluation Over the Internet" (ICC 2004), Paris, June 2004 * * Westwood+ employs end-to-end bandwidth measurement to set cwnd and * ssthresh after packet loss. The probing phase is as the original Reno. */ #include <linux/mm.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/inet_diag.h> #include <net/tcp.h> /* TCP Westwood structure */ struct westwood { u32 bw_ns_est; /* first bandwidth estimation..not too smoothed 8) */ u32 bw_est; /* bandwidth estimate */ u32 rtt_win_sx; /* here starts a new evaluation... */ u32 bk; u32 snd_una; /* used for evaluating the number of acked bytes */ u32 cumul_ack; u32 accounted; u32 rtt; u32 rtt_min; /* minimum observed RTT */ u8 first_ack; /* flag which infers that this is the first ack */ u8 reset_rtt_min; /* Reset RTT min to next RTT sample*/ }; /* TCP Westwood functions and constants */ #define TCP_WESTWOOD_RTT_MIN (HZ/20) /* 50ms */ #define TCP_WESTWOOD_INIT_RTT (20*HZ) /* maybe too conservative?! */ /* * @tcp_westwood_create * This function initializes fields used in TCP Westwood+, * it is called after the initial SYN, so the sequence numbers * are correct but new passive connections we have no * information about RTTmin at this time so we simply set it to * TCP_WESTWOOD_INIT_RTT. This value was chosen to be too conservative * since in this way we're sure it will be updated in a consistent * way as soon as possible. It will reasonably happen within the first * RTT period of the connection lifetime. */ static void tcp_westwood_init(struct sock *sk) { struct westwood *w = inet_csk_ca(sk); w->bk = 0; w->bw_ns_est = 0; w->bw_est = 0; w->accounted = 0; w->cumul_ack = 0; w->reset_rtt_min = 1; w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT; w->rtt_win_sx = tcp_jiffies32; w->snd_una = tcp_sk(sk)->snd_una; w->first_ack = 1; } /* * @westwood_do_filter * Low-pass filter. Implemented using constant coefficients. */ static inline u32 westwood_do_filter(u32 a, u32 b) { return ((7 * a) + b) >> 3; } static void westwood_filter(struct westwood *w, u32 delta) { /* If the filter is empty fill it with the first sample of bandwidth */ if (w->bw_ns_est == 0 && w->bw_est == 0) { w->bw_ns_est = w->bk / delta; w->bw_est = w->bw_ns_est; } else { w->bw_ns_est = westwood_do_filter(w->bw_ns_est, w->bk / delta); w->bw_est = westwood_do_filter(w->bw_est, w->bw_ns_est); } } /* * @westwood_pkts_acked * Called after processing group of packets. * but all westwood needs is the last sample of srtt. */ static void tcp_westwood_pkts_acked(struct sock *sk, const struct ack_sample *sample) { struct westwood *w = inet_csk_ca(sk); if (sample->rtt_us > 0) w->rtt = usecs_to_jiffies(sample->rtt_us); } /* * @westwood_update_window * It updates RTT evaluation window if it is the right moment to do * it. If so it calls filter for evaluating bandwidth. */ static void westwood_update_window(struct sock *sk) { struct westwood *w = inet_csk_ca(sk); s32 delta = tcp_jiffies32 - w->rtt_win_sx; /* Initialize w->snd_una with the first acked sequence number in order * to fix mismatch between tp->snd_una and w->snd_una for the first * bandwidth sample */ if (w->first_ack) { w->snd_una = tcp_sk(sk)->snd_una; w->first_ack = 0; } /* * See if a RTT-window has passed. * Be careful since if RTT is less than * 50ms we don't filter but we continue 'building the sample'. * This minimum limit was chosen since an estimation on small * time intervals is better to avoid... * Obviously on a LAN we reasonably will always have * right_bound = left_bound + WESTWOOD_RTT_MIN */ if (w->rtt && delta > max_t(u32, w->rtt, TCP_WESTWOOD_RTT_MIN)) { westwood_filter(w, delta); w->bk = 0; w->rtt_win_sx = tcp_jiffies32; } } static inline void update_rtt_min(struct westwood *w) { if (w->reset_rtt_min) { w->rtt_min = w->rtt; w->reset_rtt_min = 0; } else w->rtt_min = min(w->rtt, w->rtt_min); } /* * @westwood_fast_bw * It is called when we are in fast path. In particular it is called when * header prediction is successful. In such case in fact update is * straight forward and doesn't need any particular care. */ static inline void westwood_fast_bw(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct westwood *w = inet_csk_ca(sk); westwood_update_window(sk); w->bk += tp->snd_una - w->snd_una; w->snd_una = tp->snd_una; update_rtt_min(w); } /* * @westwood_acked_count * This function evaluates cumul_ack for evaluating bk in case of * delayed or partial acks. */ static inline u32 westwood_acked_count(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct westwood *w = inet_csk_ca(sk); w->cumul_ack = tp->snd_una - w->snd_una; /* If cumul_ack is 0 this is a dupack since it's not moving * tp->snd_una. */ if (!w->cumul_ack) { w->accounted += tp->mss_cache; w->cumul_ack = tp->mss_cache; } if (w->cumul_ack > tp->mss_cache) { /* Partial or delayed ack */ if (w->accounted >= w->cumul_ack) { w->accounted -= w->cumul_ack; w->cumul_ack = tp->mss_cache; } else { w->cumul_ack -= w->accounted; w->accounted = 0; } } w->snd_una = tp->snd_una; return w->cumul_ack; } /* * TCP Westwood * Here limit is evaluated as Bw estimation*RTTmin (for obtaining it * in packets we use mss_cache). Rttmin is guaranteed to be >= 2 * so avoids ever returning 0. */ static u32 tcp_westwood_bw_rttmin(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); const struct westwood *w = inet_csk_ca(sk); return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2); } static void tcp_westwood_ack(struct sock *sk, u32 ack_flags) { if (ack_flags & CA_ACK_SLOWPATH) { struct westwood *w = inet_csk_ca(sk); westwood_update_window(sk); w->bk += westwood_acked_count(sk); update_rtt_min(w); return; } westwood_fast_bw(sk); } static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) { struct tcp_sock *tp = tcp_sk(sk); struct westwood *w = inet_csk_ca(sk); switch (event) { case CA_EVENT_COMPLETE_CWR: tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); tcp_snd_cwnd_set(tp, tp->snd_ssthresh); break; case CA_EVENT_LOSS: tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); /* Update RTT_min when next ack arrives */ w->reset_rtt_min = 1; break; default: /* don't care */ break; } } /* Extract info for Tcp socket info provided via netlink. */ static size_t tcp_westwood_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { const struct westwood *ca = inet_csk_ca(sk); if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { info->vegas.tcpv_enabled = 1; info->vegas.tcpv_rttcnt = 0; info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt); info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min); *attr = INET_DIAG_VEGASINFO; return sizeof(struct tcpvegas_info); } return 0; } static struct tcp_congestion_ops tcp_westwood __read_mostly = { .init = tcp_westwood_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, .undo_cwnd = tcp_reno_undo_cwnd, .cwnd_event = tcp_westwood_event, .in_ack_event = tcp_westwood_ack, .get_info = tcp_westwood_info, .pkts_acked = tcp_westwood_pkts_acked, .owner = THIS_MODULE, .name = "westwood" }; static int __init tcp_westwood_register(void) { BUILD_BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_westwood); } static void __exit tcp_westwood_unregister(void) { tcp_unregister_congestion_control(&tcp_westwood); } module_init(tcp_westwood_register); module_exit(tcp_westwood_unregister); MODULE_AUTHOR("Stephen Hemminger, Angelo Dell'Aera"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("TCP Westwood+"); |
| 1 1 1 13 13 1 2 2 2 2 1 2 1 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 | /* * Equalizer Load-balancer for serial network interfaces. * * (c) Copyright 1995 Simon "Guru Aleph-Null" Janes * NCM: Network and Communications Management, Inc. * * (c) Copyright 2002 David S. Miller (davem@redhat.com) * * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. * * The author may be reached as simon@ncm.com, or C/O * NCM * Attn: Simon Janes * 6803 Whittier Ave * McLean VA 22101 * Phone: 1-703-847-0040 ext 103 */ /* * Sources: * skeleton.c by Donald Becker. * Inspirations: * The Harried and Overworked Alan Cox * Conspiracies: * The Alan Cox and Mike McLagan plot to get someone else to do the code, * which turned out to be me. */ /* * $Log: eql.c,v $ * Revision 1.2 1996/04/11 17:51:52 guru * Added one-line eql_remove_slave patch. * * Revision 1.1 1996/04/11 17:44:17 guru * Initial revision * * Revision 3.13 1996/01/21 15:17:18 alan * tx_queue_len changes. * reformatted. * * Revision 3.12 1995/03/22 21:07:51 anarchy * Added capable() checks on configuration. * Moved header file. * * Revision 3.11 1995/01/19 23:14:31 guru * slave_load = (ULONG_MAX - (ULONG_MAX / 2)) - * (priority_Bps) + bytes_queued * 8; * * Revision 3.10 1995/01/19 23:07:53 guru * back to * slave_load = (ULONG_MAX - (ULONG_MAX / 2)) - * (priority_Bps) + bytes_queued; * * Revision 3.9 1995/01/19 22:38:20 guru * slave_load = (ULONG_MAX - (ULONG_MAX / 2)) - * (priority_Bps) + bytes_queued * 4; * * Revision 3.8 1995/01/19 22:30:55 guru * slave_load = (ULONG_MAX - (ULONG_MAX / 2)) - * (priority_Bps) + bytes_queued * 2; * * Revision 3.7 1995/01/19 21:52:35 guru * printk's trimmed out. * * Revision 3.6 1995/01/19 21:49:56 guru * This is working pretty well. I gained 1 K/s in speed.. now it's just * robustness and printk's to be diked out. * * Revision 3.5 1995/01/18 22:29:59 guru * still crashes the kernel when the lock_wait thing is woken up. * * Revision 3.4 1995/01/18 21:59:47 guru * Broken set-bit locking snapshot * * Revision 3.3 1995/01/17 22:09:18 guru * infinite sleep in a lock somewhere.. * * Revision 3.2 1995/01/15 16:46:06 guru * Log trimmed of non-pertinent 1.x branch messages * * Revision 3.1 1995/01/15 14:41:45 guru * New Scheduler and timer stuff... * * Revision 1.15 1995/01/15 14:29:02 guru * Will make 1.14 (now 1.15) the 3.0 branch, and the 1.12 the 2.0 branch, the one * with the dumber scheduler * * Revision 1.14 1995/01/15 02:37:08 guru * shock.. the kept-new-versions could have zonked working * stuff.. shudder * * Revision 1.13 1995/01/15 02:36:31 guru * big changes * * scheduler was torn out and replaced with something smarter * * global names not prefixed with eql_ were renamed to protect * against namespace collisions * * a few more abstract interfaces were added to facilitate any * potential change of datastructure. the driver is still using * a linked list of slaves. going to a heap would be a bit of * an overkill. * * this compiles fine with no warnings. * * the locking mechanism and timer stuff must be written however, * this version will not work otherwise * * Sorry, I had to rewrite most of this for 2.5.x -DaveM */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/compat.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/timer.h> #include <linux/netdevice.h> #include <net/net_namespace.h> #include <linux/if.h> #include <linux/if_arp.h> #include <linux/if_eql.h> #include <linux/pkt_sched.h> #include <linux/uaccess.h> static int eql_open(struct net_device *dev); static int eql_close(struct net_device *dev); static int eql_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd); static netdev_tx_t eql_slave_xmit(struct sk_buff *skb, struct net_device *dev); #define eql_is_slave(dev) ((dev->flags & IFF_SLAVE) == IFF_SLAVE) #define eql_is_master(dev) ((dev->flags & IFF_MASTER) == IFF_MASTER) static void eql_kill_one_slave(slave_queue_t *queue, slave_t *slave); static void eql_timer(struct timer_list *t) { equalizer_t *eql = timer_container_of(eql, t, timer); struct list_head *this, *tmp, *head; spin_lock(&eql->queue.lock); head = &eql->queue.all_slaves; list_for_each_safe(this, tmp, head) { slave_t *slave = list_entry(this, slave_t, list); if ((slave->dev->flags & IFF_UP) == IFF_UP) { slave->bytes_queued -= slave->priority_Bps; if (slave->bytes_queued < 0) slave->bytes_queued = 0; } else { eql_kill_one_slave(&eql->queue, slave); } } spin_unlock(&eql->queue.lock); eql->timer.expires = jiffies + EQL_DEFAULT_RESCHED_IVAL; add_timer(&eql->timer); } static const char version[] __initconst = "Equalizer2002: Simon Janes (simon@ncm.com) and David S. Miller (davem@redhat.com)"; static const struct net_device_ops eql_netdev_ops = { .ndo_open = eql_open, .ndo_stop = eql_close, .ndo_siocdevprivate = eql_siocdevprivate, .ndo_start_xmit = eql_slave_xmit, }; static void __init eql_setup(struct net_device *dev) { equalizer_t *eql = netdev_priv(dev); timer_setup(&eql->timer, eql_timer, 0); eql->timer.expires = jiffies + EQL_DEFAULT_RESCHED_IVAL; spin_lock_init(&eql->queue.lock); INIT_LIST_HEAD(&eql->queue.all_slaves); eql->queue.master_dev = dev; dev->netdev_ops = &eql_netdev_ops; /* * Now we undo some of the things that eth_setup does * that we don't like */ dev->mtu = EQL_DEFAULT_MTU; /* set to 576 in if_eql.h */ dev->flags = IFF_MASTER; dev->type = ARPHRD_SLIP; dev->tx_queue_len = 5; /* Hands them off fast */ netif_keep_dst(dev); } static int eql_open(struct net_device *dev) { equalizer_t *eql = netdev_priv(dev); /* XXX We should force this off automatically for the user. */ netdev_info(dev, "remember to turn off Van-Jacobson compression on your slave devices\n"); BUG_ON(!list_empty(&eql->queue.all_slaves)); eql->min_slaves = 1; eql->max_slaves = EQL_DEFAULT_MAX_SLAVES; /* 4 usually... */ add_timer(&eql->timer); return 0; } static void eql_kill_one_slave(slave_queue_t *queue, slave_t *slave) { list_del(&slave->list); queue->num_slaves--; slave->dev->flags &= ~IFF_SLAVE; netdev_put(slave->dev, &slave->dev_tracker); kfree(slave); } static void eql_kill_slave_queue(slave_queue_t *queue) { struct list_head *head, *tmp, *this; spin_lock_bh(&queue->lock); head = &queue->all_slaves; list_for_each_safe(this, tmp, head) { slave_t *s = list_entry(this, slave_t, list); eql_kill_one_slave(queue, s); } spin_unlock_bh(&queue->lock); } static int eql_close(struct net_device *dev) { equalizer_t *eql = netdev_priv(dev); /* * The timer has to be stopped first before we start hacking away * at the data structure it scans every so often... */ timer_delete_sync(&eql->timer); eql_kill_slave_queue(&eql->queue); return 0; } static int eql_enslave(struct net_device *dev, slaving_request_t __user *srq); static int eql_emancipate(struct net_device *dev, slaving_request_t __user *srq); static int eql_g_slave_cfg(struct net_device *dev, slave_config_t __user *sc); static int eql_s_slave_cfg(struct net_device *dev, slave_config_t __user *sc); static int eql_g_master_cfg(struct net_device *dev, master_config_t __user *mc); static int eql_s_master_cfg(struct net_device *dev, master_config_t __user *mc); static int eql_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { if (cmd != EQL_GETMASTRCFG && cmd != EQL_GETSLAVECFG && !capable(CAP_NET_ADMIN)) return -EPERM; if (in_compat_syscall()) /* to be implemented */ return -EOPNOTSUPP; switch (cmd) { case EQL_ENSLAVE: return eql_enslave(dev, data); case EQL_EMANCIPATE: return eql_emancipate(dev, data); case EQL_GETSLAVECFG: return eql_g_slave_cfg(dev, data); case EQL_SETSLAVECFG: return eql_s_slave_cfg(dev, data); case EQL_GETMASTRCFG: return eql_g_master_cfg(dev, data); case EQL_SETMASTRCFG: return eql_s_master_cfg(dev, data); default: return -EOPNOTSUPP; } } /* queue->lock must be held */ static slave_t *__eql_schedule_slaves(slave_queue_t *queue) { unsigned long best_load = ~0UL; struct list_head *this, *tmp, *head; slave_t *best_slave; best_slave = NULL; /* Make a pass to set the best slave. */ head = &queue->all_slaves; list_for_each_safe(this, tmp, head) { slave_t *slave = list_entry(this, slave_t, list); unsigned long slave_load, bytes_queued, priority_Bps; /* Go through the slave list once, updating best_slave * whenever a new best_load is found. */ bytes_queued = slave->bytes_queued; priority_Bps = slave->priority_Bps; if ((slave->dev->flags & IFF_UP) == IFF_UP) { slave_load = (~0UL - (~0UL / 2)) - (priority_Bps) + bytes_queued * 8; if (slave_load < best_load) { best_load = slave_load; best_slave = slave; } } else { /* We found a dead slave, kill it. */ eql_kill_one_slave(queue, slave); } } return best_slave; } static netdev_tx_t eql_slave_xmit(struct sk_buff *skb, struct net_device *dev) { equalizer_t *eql = netdev_priv(dev); slave_t *slave; spin_lock(&eql->queue.lock); slave = __eql_schedule_slaves(&eql->queue); if (slave) { struct net_device *slave_dev = slave->dev; skb->dev = slave_dev; skb->priority = TC_PRIO_FILLER; slave->bytes_queued += skb->len; dev_queue_xmit(skb); dev->stats.tx_packets++; } else { dev->stats.tx_dropped++; dev_kfree_skb(skb); } spin_unlock(&eql->queue.lock); return NETDEV_TX_OK; } /* * Private ioctl functions */ /* queue->lock must be held */ static slave_t *__eql_find_slave_dev(slave_queue_t *queue, struct net_device *dev) { struct list_head *this, *head; head = &queue->all_slaves; list_for_each(this, head) { slave_t *slave = list_entry(this, slave_t, list); if (slave->dev == dev) return slave; } return NULL; } static inline int eql_is_full(slave_queue_t *queue) { equalizer_t *eql = netdev_priv(queue->master_dev); if (queue->num_slaves >= eql->max_slaves) return 1; return 0; } /* queue->lock must be held */ static int __eql_insert_slave(slave_queue_t *queue, slave_t *slave) { if (!eql_is_full(queue)) { slave_t *duplicate_slave = NULL; duplicate_slave = __eql_find_slave_dev(queue, slave->dev); if (duplicate_slave) eql_kill_one_slave(queue, duplicate_slave); netdev_hold(slave->dev, &slave->dev_tracker, GFP_ATOMIC); list_add(&slave->list, &queue->all_slaves); queue->num_slaves++; slave->dev->flags |= IFF_SLAVE; return 0; } return -ENOSPC; } static int eql_enslave(struct net_device *master_dev, slaving_request_t __user *srqp) { struct net_device *slave_dev; slaving_request_t srq; if (copy_from_user(&srq, srqp, sizeof (slaving_request_t))) return -EFAULT; slave_dev = __dev_get_by_name(&init_net, srq.slave_name); if (!slave_dev) return -ENODEV; if ((master_dev->flags & IFF_UP) == IFF_UP) { /* slave is not a master & not already a slave: */ if (!eql_is_master(slave_dev) && !eql_is_slave(slave_dev)) { slave_t *s = kzalloc(sizeof(*s), GFP_KERNEL); equalizer_t *eql = netdev_priv(master_dev); int ret; if (!s) return -ENOMEM; s->dev = slave_dev; s->priority = srq.priority; s->priority_bps = srq.priority; s->priority_Bps = srq.priority / 8; spin_lock_bh(&eql->queue.lock); ret = __eql_insert_slave(&eql->queue, s); if (ret) kfree(s); spin_unlock_bh(&eql->queue.lock); return ret; } } return -EINVAL; } static int eql_emancipate(struct net_device *master_dev, slaving_request_t __user *srqp) { equalizer_t *eql = netdev_priv(master_dev); struct net_device *slave_dev; slaving_request_t srq; int ret; if (copy_from_user(&srq, srqp, sizeof (slaving_request_t))) return -EFAULT; slave_dev = __dev_get_by_name(&init_net, srq.slave_name); if (!slave_dev) return -ENODEV; ret = -EINVAL; spin_lock_bh(&eql->queue.lock); if (eql_is_slave(slave_dev)) { slave_t *slave = __eql_find_slave_dev(&eql->queue, slave_dev); if (slave) { eql_kill_one_slave(&eql->queue, slave); ret = 0; } } spin_unlock_bh(&eql->queue.lock); return ret; } static int eql_g_slave_cfg(struct net_device *dev, slave_config_t __user *scp) { equalizer_t *eql = netdev_priv(dev); slave_t *slave; struct net_device *slave_dev; slave_config_t sc; int ret; if (copy_from_user(&sc, scp, sizeof (slave_config_t))) return -EFAULT; slave_dev = __dev_get_by_name(&init_net, sc.slave_name); if (!slave_dev) return -ENODEV; ret = -EINVAL; spin_lock_bh(&eql->queue.lock); if (eql_is_slave(slave_dev)) { slave = __eql_find_slave_dev(&eql->queue, slave_dev); if (slave) { sc.priority = slave->priority; ret = 0; } } spin_unlock_bh(&eql->queue.lock); if (!ret && copy_to_user(scp, &sc, sizeof (slave_config_t))) ret = -EFAULT; return ret; } static int eql_s_slave_cfg(struct net_device *dev, slave_config_t __user *scp) { slave_t *slave; equalizer_t *eql; struct net_device *slave_dev; slave_config_t sc; int ret; if (copy_from_user(&sc, scp, sizeof (slave_config_t))) return -EFAULT; slave_dev = __dev_get_by_name(&init_net, sc.slave_name); if (!slave_dev) return -ENODEV; ret = -EINVAL; eql = netdev_priv(dev); spin_lock_bh(&eql->queue.lock); if (eql_is_slave(slave_dev)) { slave = __eql_find_slave_dev(&eql->queue, slave_dev); if (slave) { slave->priority = sc.priority; slave->priority_bps = sc.priority; slave->priority_Bps = sc.priority / 8; ret = 0; } } spin_unlock_bh(&eql->queue.lock); return ret; } static int eql_g_master_cfg(struct net_device *dev, master_config_t __user *mcp) { equalizer_t *eql; master_config_t mc; memset(&mc, 0, sizeof(master_config_t)); if (eql_is_master(dev)) { eql = netdev_priv(dev); mc.max_slaves = eql->max_slaves; mc.min_slaves = eql->min_slaves; if (copy_to_user(mcp, &mc, sizeof (master_config_t))) return -EFAULT; return 0; } return -EINVAL; } static int eql_s_master_cfg(struct net_device *dev, master_config_t __user *mcp) { equalizer_t *eql; master_config_t mc; if (copy_from_user(&mc, mcp, sizeof (master_config_t))) return -EFAULT; if (eql_is_master(dev)) { eql = netdev_priv(dev); eql->max_slaves = mc.max_slaves; eql->min_slaves = mc.min_slaves; return 0; } return -EINVAL; } static struct net_device *dev_eql; static int __init eql_init_module(void) { int err; pr_info("%s\n", version); dev_eql = alloc_netdev(sizeof(equalizer_t), "eql", NET_NAME_UNKNOWN, eql_setup); if (!dev_eql) return -ENOMEM; err = register_netdev(dev_eql); if (err) free_netdev(dev_eql); return err; } static void __exit eql_cleanup_module(void) { unregister_netdev(dev_eql); free_netdev(dev_eql); } module_init(eql_init_module); module_exit(eql_cleanup_module); MODULE_DESCRIPTION("Equalizer Load-balancer for serial network interfaces"); MODULE_LICENSE("GPL"); |
| 1082 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _X_TABLES_H #define _X_TABLES_H #include <linux/netdevice.h> #include <linux/static_key.h> #include <linux/netfilter.h> #include <uapi/linux/netfilter/x_tables.h> /* Test a struct->invflags and a boolean for inequality */ #define NF_INVF(ptr, flag, boolean) \ ((boolean) ^ !!((ptr)->invflags & (flag))) /** * struct xt_action_param - parameters for matches/targets * * @match: the match extension * @target: the target extension * @matchinfo: per-match data * @targetinfo: per-target data * @state: pointer to hook state this packet came from * @fragoff: packet is a fragment, this is the data offset * @thoff: position of transport header relative to skb->data * * Fields written to by extensions: * * @hotdrop: drop packet if we had inspection problems */ struct xt_action_param { union { const struct xt_match *match; const struct xt_target *target; }; union { const void *matchinfo, *targinfo; }; const struct nf_hook_state *state; unsigned int thoff; u16 fragoff; bool hotdrop; }; static inline struct net *xt_net(const struct xt_action_param *par) { return par->state->net; } static inline struct net_device *xt_in(const struct xt_action_param *par) { return par->state->in; } static inline struct net_device *xt_out(const struct xt_action_param *par) { return par->state->out; } static inline unsigned int xt_hooknum(const struct xt_action_param *par) { return par->state->hook; } static inline u_int8_t xt_family(const struct xt_action_param *par) { return par->state->pf; } /** * struct xt_mtchk_param - parameters for match extensions' * checkentry functions * * @net: network namespace through which the check was invoked * @table: table the rule is tried to be inserted into * @entryinfo: the family-specific rule data * (struct ipt_ip, ip6t_ip, arpt_arp or (note) ebt_entry) * @match: struct xt_match through which this function was invoked * @matchinfo: per-match data * @hook_mask: via which hooks the new rule is reachable * Other fields as above. */ struct xt_mtchk_param { struct net *net; const char *table; const void *entryinfo; const struct xt_match *match; void *matchinfo; unsigned int hook_mask; u_int8_t family; bool nft_compat; }; /** * struct xt_mdtor_param - match destructor parameters * Fields as above. */ struct xt_mtdtor_param { struct net *net; const struct xt_match *match; void *matchinfo; u_int8_t family; }; /** * struct xt_tgchk_param - parameters for target extensions' * checkentry functions * * @entryinfo: the family-specific rule data * (struct ipt_entry, ip6t_entry, arpt_entry, ebt_entry) * * Other fields see above. */ struct xt_tgchk_param { struct net *net; const char *table; const void *entryinfo; const struct xt_target *target; void *targinfo; unsigned int hook_mask; u_int8_t family; bool nft_compat; }; /* Target destructor parameters */ struct xt_tgdtor_param { struct net *net; const struct xt_target *target; void *targinfo; u_int8_t family; }; struct xt_match { struct list_head list; const char name[XT_EXTENSION_MAXNAMELEN]; u_int8_t revision; /* Return true or false: return FALSE and set *hotdrop = 1 to force immediate packet drop. */ /* Arguments changed since 2.6.9, as this must now handle non-linear skb, using skb_header_pointer and skb_ip_make_writable. */ bool (*match)(const struct sk_buff *skb, struct xt_action_param *); /* Called when user tries to insert an entry of this type. */ int (*checkentry)(const struct xt_mtchk_param *); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_mtdtor_param *); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, const void *src); int (*compat_to_user)(void __user *dst, const void *src); #endif /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; const char *table; unsigned int matchsize; unsigned int usersize; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT unsigned int compatsize; #endif unsigned int hooks; unsigned short proto; unsigned short family; }; /* Registration hooks for targets. */ struct xt_target { struct list_head list; const char name[XT_EXTENSION_MAXNAMELEN]; u_int8_t revision; /* Returns verdict. Argument order changed since 2.6.9, as this must now handle non-linear skbs, using skb_copy_bits and skb_ip_make_writable. */ unsigned int (*target)(struct sk_buff *skb, const struct xt_action_param *); /* Called when user tries to insert an entry of this type: hook_mask is a bitmask of hooks from which it can be called. */ /* Should return 0 on success or an error code otherwise (-Exxxx). */ int (*checkentry)(const struct xt_tgchk_param *); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_tgdtor_param *); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, const void *src); int (*compat_to_user)(void __user *dst, const void *src); #endif /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; const char *table; unsigned int targetsize; unsigned int usersize; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT unsigned int compatsize; #endif unsigned int hooks; unsigned short proto; unsigned short family; }; /* Furniture shopping... */ struct xt_table { struct list_head list; /* What hooks you will enter on */ unsigned int valid_hooks; /* Man behind the curtain... */ struct xt_table_info *private; /* hook ops that register the table with the netfilter core */ struct nf_hook_ops *ops; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; u_int8_t af; /* address/protocol family */ int priority; /* hook order */ /* A unique name... */ const char name[XT_TABLE_MAXNAMELEN]; }; #include <linux/netfilter_ipv4.h> /* The table itself */ struct xt_table_info { /* Size per table */ unsigned int size; /* Number of entries: FIXME. --RR */ unsigned int number; /* Initial number of entries. Needed for module usage count */ unsigned int initial_entries; /* Entry points and underflows */ unsigned int hook_entry[NF_INET_NUMHOOKS]; unsigned int underflow[NF_INET_NUMHOOKS]; /* * Number of user chains. Since tables cannot have loops, at most * @stacksize jumps (number of user chains) can possibly be made. */ unsigned int stacksize; void ***jumpstack; unsigned char entries[] __aligned(8); }; int xt_register_target(struct xt_target *target); void xt_unregister_target(struct xt_target *target); int xt_register_targets(struct xt_target *target, unsigned int n); void xt_unregister_targets(struct xt_target *target, unsigned int n); int xt_register_match(struct xt_match *target); void xt_unregister_match(struct xt_match *target); int xt_register_matches(struct xt_match *match, unsigned int n); void xt_unregister_matches(struct xt_match *match, unsigned int n); int xt_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks); unsigned int *xt_alloc_entry_offsets(unsigned int size); bool xt_find_jump_offset(const unsigned int *offsets, unsigned int target, unsigned int size); int xt_check_proc_name(const char *name, unsigned int size); int xt_check_match(struct xt_mtchk_param *, unsigned int size, u16 proto, bool inv_proto); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u16 proto, bool inv_proto); int xt_match_to_user(const struct xt_entry_match *m, struct xt_entry_match __user *u); int xt_target_to_user(const struct xt_entry_target *t, struct xt_entry_target __user *u); int xt_data_to_user(void __user *dst, const void *src, int usersize, int size, int aligned_size); void *xt_copy_counters(sockptr_t arg, unsigned int len, struct xt_counters_info *info); struct xt_counters *xt_counters_alloc(unsigned int counters); struct xt_table *xt_register_table(struct net *net, const struct xt_table *table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo); void *xt_unregister_table(struct xt_table *table); struct xt_table_info *xt_replace_table(struct xt_table *table, unsigned int num_counters, struct xt_table_info *newinfo, int *error); struct xt_match *xt_find_match(u8 af, const char *name, u8 revision); struct xt_match *xt_request_find_match(u8 af, const char *name, u8 revision); struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision); int xt_find_revision(u8 af, const char *name, u8 revision, int target, int *err); struct xt_table *xt_find_table(struct net *net, u8 af, const char *name); struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name); struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af, const char *name); void xt_table_unlock(struct xt_table *t); int xt_proto_init(struct net *net, u_int8_t af); void xt_proto_fini(struct net *net, u_int8_t af); struct xt_table_info *xt_alloc_table_info(unsigned int size); void xt_free_table_info(struct xt_table_info *info); /** * xt_recseq - recursive seqcount for netfilter use * * Packet processing changes the seqcount only if no recursion happened * get_counters() can use read_seqcount_begin()/read_seqcount_retry(), * because we use the normal seqcount convention : * Low order bit set to 1 if a writer is active. */ DECLARE_PER_CPU(seqcount_t, xt_recseq); /* xt_tee_enabled - true if x_tables needs to handle reentrancy * * Enabled if current ip(6)tables ruleset has at least one -j TEE rule. */ extern struct static_key xt_tee_enabled; /** * xt_write_recseq_begin - start of a write section * * Begin packet processing : all readers must wait the end * 1) Must be called with preemption disabled * 2) softirqs must be disabled too (or we should use this_cpu_add()) * Returns: * 1 if no recursion on this cpu * 0 if recursion detected */ static inline unsigned int xt_write_recseq_begin(void) { unsigned int addend; /* * Low order bit of sequence is set if we already * called xt_write_recseq_begin(). */ addend = (__this_cpu_read(xt_recseq.sequence) + 1) & 1; /* * This is kind of a write_seqcount_begin(), but addend is 0 or 1 * We dont check addend value to avoid a test and conditional jump, * since addend is most likely 1 */ __this_cpu_add(xt_recseq.sequence, addend); smp_mb(); return addend; } /** * xt_write_recseq_end - end of a write section * @addend: return value from previous xt_write_recseq_begin() * * End packet processing : all readers can proceed * 1) Must be called with preemption disabled * 2) softirqs must be disabled too (or we should use this_cpu_add()) */ static inline void xt_write_recseq_end(unsigned int addend) { /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */ smp_wmb(); __this_cpu_add(xt_recseq.sequence, addend); } /* * This helper is performance critical and must be inlined */ static inline unsigned long ifname_compare_aligned(const char *_a, const char *_b, const char *_mask) { const unsigned long *a = (const unsigned long *)_a; const unsigned long *b = (const unsigned long *)_b; const unsigned long *mask = (const unsigned long *)_mask; unsigned long ret; ret = (a[0] ^ b[0]) & mask[0]; if (IFNAMSIZ > sizeof(unsigned long)) ret |= (a[1] ^ b[1]) & mask[1]; if (IFNAMSIZ > 2 * sizeof(unsigned long)) ret |= (a[2] ^ b[2]) & mask[2]; if (IFNAMSIZ > 3 * sizeof(unsigned long)) ret |= (a[3] ^ b[3]) & mask[3]; BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); return ret; } struct xt_percpu_counter_alloc_state { unsigned int off; const char __percpu *mem; }; bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state, struct xt_counters *counter); void xt_percpu_counter_free(struct xt_counters *cnt); static inline struct xt_counters * xt_get_this_cpu_counter(struct xt_counters *cnt) { if (nr_cpu_ids > 1) return this_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt); return cnt; } static inline struct xt_counters * xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) { if (nr_cpu_ids > 1) return per_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt, cpu); return cnt; } struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); int xt_register_template(const struct xt_table *t, int(*table_init)(struct net *net)); void xt_unregister_template(const struct xt_table *t); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include <net/compat.h> struct compat_xt_entry_match { union { struct { u_int16_t match_size; char name[XT_FUNCTION_MAXNAMELEN - 1]; u_int8_t revision; } user; struct { u_int16_t match_size; compat_uptr_t match; } kernel; u_int16_t match_size; } u; unsigned char data[]; }; struct compat_xt_entry_target { union { struct { u_int16_t target_size; char name[XT_FUNCTION_MAXNAMELEN - 1]; u_int8_t revision; } user; struct { u_int16_t target_size; compat_uptr_t target; } kernel; u_int16_t target_size; } u; unsigned char data[]; }; /* FIXME: this works only on 32 bit tasks * need to change whole approach in order to calculate align as function of * current task alignment */ struct compat_xt_counters { compat_u64 pcnt, bcnt; /* Packet and byte counters */ }; struct compat_xt_counters_info { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t num_counters; struct compat_xt_counters counters[]; }; struct _compat_xt_align { __u8 u8; __u16 u16; __u32 u32; compat_u64 u64; }; #define COMPAT_XT_ALIGN(s) __ALIGN_KERNEL((s), __alignof__(struct _compat_xt_align)) void xt_compat_lock(u_int8_t af); void xt_compat_unlock(u_int8_t af); int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta); void xt_compat_flush_offsets(u_int8_t af); int xt_compat_init_offsets(u8 af, unsigned int number); int xt_compat_calc_jump(u_int8_t af, unsigned int offset); int xt_compat_match_offset(const struct xt_match *match); void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, unsigned int *size); int xt_compat_match_to_user(const struct xt_entry_match *m, void __user **dstptr, unsigned int *size); int xt_compat_target_offset(const struct xt_target *target); void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size); int xt_compat_target_to_user(const struct xt_entry_target *t, void __user **dstptr, unsigned int *size); int xt_compat_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); #endif /* CONFIG_NETFILTER_XTABLES_COMPAT */ #endif /* _X_TABLES_H */ |
| 5 5 14 14 14 3 4 26 18 8 2 2 1 1 3 3 6 6 18 18 4 14 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 | // SPDX-License-Identifier: GPL-2.0+ /* * comedi_buf.c * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org> * Copyright (C) 2002 Frank Mori Hess <fmhess@users.sourceforge.net> */ #include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/comedi/comedidev.h> #include "comedi_internal.h" #ifdef PAGE_KERNEL_NOCACHE #define COMEDI_PAGE_PROTECTION PAGE_KERNEL_NOCACHE #else #define COMEDI_PAGE_PROTECTION PAGE_KERNEL #endif static void comedi_buf_map_kref_release(struct kref *kref) { struct comedi_buf_map *bm = container_of(kref, struct comedi_buf_map, refcount); struct comedi_buf_page *buf; unsigned int i; if (bm->page_list) { if (bm->dma_dir != DMA_NONE) { for (i = 0; i < bm->n_pages; i++) { buf = &bm->page_list[i]; dma_free_coherent(bm->dma_hw_dev, PAGE_SIZE, buf->virt_addr, buf->dma_addr); } } else { for (i = 0; i < bm->n_pages; i++) { buf = &bm->page_list[i]; ClearPageReserved(virt_to_page(buf->virt_addr)); free_page((unsigned long)buf->virt_addr); } } vfree(bm->page_list); } if (bm->dma_dir != DMA_NONE) put_device(bm->dma_hw_dev); kfree(bm); } static void __comedi_buf_free(struct comedi_device *dev, struct comedi_subdevice *s) { struct comedi_async *async = s->async; struct comedi_buf_map *bm; unsigned long flags; async->prealloc_bufsz = 0; spin_lock_irqsave(&s->spin_lock, flags); bm = async->buf_map; async->buf_map = NULL; spin_unlock_irqrestore(&s->spin_lock, flags); comedi_buf_map_put(bm); } static struct comedi_buf_map * comedi_buf_map_alloc(struct comedi_device *dev, enum dma_data_direction dma_dir, unsigned int n_pages) { struct comedi_buf_map *bm; struct comedi_buf_page *buf; unsigned int i; bm = kzalloc(sizeof(*bm), GFP_KERNEL); if (!bm) return NULL; kref_init(&bm->refcount); bm->dma_dir = dma_dir; if (bm->dma_dir != DMA_NONE) { /* Need ref to hardware device to free buffer later. */ bm->dma_hw_dev = get_device(dev->hw_dev); } bm->page_list = vzalloc(sizeof(*buf) * n_pages); if (!bm->page_list) goto err; if (bm->dma_dir != DMA_NONE) { for (i = 0; i < n_pages; i++) { buf = &bm->page_list[i]; buf->virt_addr = dma_alloc_coherent(bm->dma_hw_dev, PAGE_SIZE, &buf->dma_addr, GFP_KERNEL); if (!buf->virt_addr) break; } } else { for (i = 0; i < n_pages; i++) { buf = &bm->page_list[i]; buf->virt_addr = (void *)get_zeroed_page(GFP_KERNEL); if (!buf->virt_addr) break; SetPageReserved(virt_to_page(buf->virt_addr)); } } bm->n_pages = i; if (i < n_pages) goto err; return bm; err: comedi_buf_map_put(bm); return NULL; } static void __comedi_buf_alloc(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int n_pages) { struct comedi_async *async = s->async; struct comedi_buf_map *bm; unsigned long flags; if (!IS_ENABLED(CONFIG_HAS_DMA) && s->async_dma_dir != DMA_NONE) { dev_err(dev->class_dev, "dma buffer allocation not supported\n"); return; } bm = comedi_buf_map_alloc(dev, s->async_dma_dir, n_pages); if (!bm) return; spin_lock_irqsave(&s->spin_lock, flags); async->buf_map = bm; spin_unlock_irqrestore(&s->spin_lock, flags); async->prealloc_bufsz = n_pages << PAGE_SHIFT; } void comedi_buf_map_get(struct comedi_buf_map *bm) { if (bm) kref_get(&bm->refcount); } int comedi_buf_map_put(struct comedi_buf_map *bm) { if (bm) return kref_put(&bm->refcount, comedi_buf_map_kref_release); return 1; } /* helper for "access" vm operation */ int comedi_buf_map_access(struct comedi_buf_map *bm, unsigned long offset, void *buf, int len, int write) { unsigned int pgoff = offset_in_page(offset); unsigned long pg = offset >> PAGE_SHIFT; int done = 0; while (done < len && pg < bm->n_pages) { int l = min_t(int, len - done, PAGE_SIZE - pgoff); void *b = bm->page_list[pg].virt_addr + pgoff; if (write) memcpy(b, buf, l); else memcpy(buf, b, l); buf += l; done += l; pg++; pgoff = 0; } return done; } /* returns s->async->buf_map and increments its kref refcount */ struct comedi_buf_map * comedi_buf_map_from_subdev_get(struct comedi_subdevice *s) { struct comedi_async *async = s->async; struct comedi_buf_map *bm = NULL; unsigned long flags; if (!async) return NULL; spin_lock_irqsave(&s->spin_lock, flags); bm = async->buf_map; /* only want it if buffer pages allocated */ if (bm && bm->n_pages) comedi_buf_map_get(bm); else bm = NULL; spin_unlock_irqrestore(&s->spin_lock, flags); return bm; } bool comedi_buf_is_mmapped(struct comedi_subdevice *s) { struct comedi_buf_map *bm = s->async->buf_map; return bm && (kref_read(&bm->refcount) > 1); } int comedi_buf_alloc(struct comedi_device *dev, struct comedi_subdevice *s, unsigned long new_size) { struct comedi_async *async = s->async; lockdep_assert_held(&dev->mutex); /* Round up new_size to multiple of PAGE_SIZE */ new_size = (new_size + PAGE_SIZE - 1) & PAGE_MASK; /* if no change is required, do nothing */ if (async->prealloc_bufsz == new_size) return 0; /* deallocate old buffer */ __comedi_buf_free(dev, s); /* allocate new buffer */ if (new_size) { unsigned int n_pages = new_size >> PAGE_SHIFT; __comedi_buf_alloc(dev, s, n_pages); if (!async->prealloc_bufsz) return -ENOMEM; } return 0; } void comedi_buf_reset(struct comedi_subdevice *s) { struct comedi_async *async = s->async; async->buf_write_alloc_count = 0; async->buf_write_count = 0; async->buf_read_alloc_count = 0; async->buf_read_count = 0; async->buf_write_ptr = 0; async->buf_read_ptr = 0; async->cur_chan = 0; async->scans_done = 0; async->scan_progress = 0; async->munge_chan = 0; async->munge_count = 0; async->munge_ptr = 0; async->events = 0; } static unsigned int comedi_buf_write_n_unalloc(struct comedi_subdevice *s) { struct comedi_async *async = s->async; unsigned int free_end = async->buf_read_count + async->prealloc_bufsz; return free_end - async->buf_write_alloc_count; } unsigned int comedi_buf_write_n_available(struct comedi_subdevice *s) { struct comedi_async *async = s->async; unsigned int free_end = async->buf_read_count + async->prealloc_bufsz; return free_end - async->buf_write_count; } /** * comedi_buf_write_alloc() - Reserve buffer space for writing * @s: COMEDI subdevice. * @nbytes: Maximum space to reserve in bytes. * * Reserve up to @nbytes bytes of space to be written in the COMEDI acquisition * data buffer associated with the subdevice. The amount reserved is limited * by the space available. * * Return: The amount of space reserved in bytes. */ unsigned int comedi_buf_write_alloc(struct comedi_subdevice *s, unsigned int nbytes) { struct comedi_async *async = s->async; unsigned int unalloc = comedi_buf_write_n_unalloc(s); if (nbytes > unalloc) nbytes = unalloc; async->buf_write_alloc_count += nbytes; /* * ensure the async buffer 'counts' are read and updated * before we write data to the write-alloc'ed buffer space */ smp_mb(); return nbytes; } EXPORT_SYMBOL_GPL(comedi_buf_write_alloc); /* * munging is applied to data by core as it passes between user * and kernel space */ static unsigned int comedi_buf_munge(struct comedi_subdevice *s, unsigned int num_bytes) { struct comedi_async *async = s->async; struct comedi_buf_page *buf_page_list = async->buf_map->page_list; unsigned int count = 0; const unsigned int num_sample_bytes = comedi_bytes_per_sample(s); if (!s->munge || (async->cmd.flags & CMDF_RAWDATA) || async->cmd.chanlist_len == 0) { async->munge_count += num_bytes; return num_bytes; } /* don't munge partial samples */ num_bytes -= num_bytes % num_sample_bytes; while (count < num_bytes) { /* * Do not munge beyond page boundary. * Note: prealloc_bufsz is a multiple of PAGE_SIZE. */ unsigned int page = async->munge_ptr >> PAGE_SHIFT; unsigned int offset = offset_in_page(async->munge_ptr); unsigned int block_size = min(num_bytes - count, PAGE_SIZE - offset); s->munge(s->device, s, buf_page_list[page].virt_addr + offset, block_size, async->munge_chan); /* * ensure data is munged in buffer before the * async buffer munge_count is incremented */ smp_wmb(); async->munge_chan += block_size / num_sample_bytes; async->munge_chan %= async->cmd.chanlist_len; async->munge_count += block_size; async->munge_ptr += block_size; if (async->munge_ptr == async->prealloc_bufsz) async->munge_ptr = 0; count += block_size; } return count; } unsigned int comedi_buf_write_n_allocated(struct comedi_subdevice *s) { struct comedi_async *async = s->async; return async->buf_write_alloc_count - async->buf_write_count; } /** * comedi_buf_write_free() - Free buffer space after it is written * @s: COMEDI subdevice. * @nbytes: Maximum space to free in bytes. * * Free up to @nbytes bytes of space previously reserved for writing in the * COMEDI acquisition data buffer associated with the subdevice. The amount of * space freed is limited to the amount that was reserved. The freed space is * assumed to have been filled with sample data by the writer. * * If the samples in the freed space need to be "munged", do so here. The * freed space becomes available for allocation by the reader. * * Return: The amount of space freed in bytes. */ unsigned int comedi_buf_write_free(struct comedi_subdevice *s, unsigned int nbytes) { struct comedi_async *async = s->async; unsigned int allocated = comedi_buf_write_n_allocated(s); if (nbytes > allocated) nbytes = allocated; async->buf_write_count += nbytes; async->buf_write_ptr += nbytes; comedi_buf_munge(s, async->buf_write_count - async->munge_count); if (async->buf_write_ptr >= async->prealloc_bufsz) async->buf_write_ptr %= async->prealloc_bufsz; return nbytes; } EXPORT_SYMBOL_GPL(comedi_buf_write_free); /** * comedi_buf_read_n_available() - Determine amount of readable buffer space * @s: COMEDI subdevice. * * Determine the amount of readable buffer space in the COMEDI acquisition data * buffer associated with the subdevice. The readable buffer space is that * which has been freed by the writer and "munged" to the sample data format * expected by COMEDI if necessary. * * Return: The amount of readable buffer space. */ unsigned int comedi_buf_read_n_available(struct comedi_subdevice *s) { struct comedi_async *async = s->async; unsigned int num_bytes; if (!async) return 0; num_bytes = async->munge_count - async->buf_read_count; /* * ensure the async buffer 'counts' are read before we * attempt to read data from the buffer */ smp_rmb(); return num_bytes; } EXPORT_SYMBOL_GPL(comedi_buf_read_n_available); /** * comedi_buf_read_alloc() - Reserve buffer space for reading * @s: COMEDI subdevice. * @nbytes: Maximum space to reserve in bytes. * * Reserve up to @nbytes bytes of previously written and "munged" buffer space * for reading in the COMEDI acquisition data buffer associated with the * subdevice. The amount reserved is limited to the space available. The * reader can read from the reserved space and then free it. A reader is also * allowed to read from the space before reserving it as long as it determines * the amount of readable data available, but the space needs to be marked as * reserved before it can be freed. * * Return: The amount of space reserved in bytes. */ unsigned int comedi_buf_read_alloc(struct comedi_subdevice *s, unsigned int nbytes) { struct comedi_async *async = s->async; unsigned int available; available = async->munge_count - async->buf_read_alloc_count; if (nbytes > available) nbytes = available; async->buf_read_alloc_count += nbytes; /* * ensure the async buffer 'counts' are read before we * attempt to read data from the read-alloc'ed buffer space */ smp_rmb(); return nbytes; } EXPORT_SYMBOL_GPL(comedi_buf_read_alloc); static unsigned int comedi_buf_read_n_allocated(struct comedi_async *async) { return async->buf_read_alloc_count - async->buf_read_count; } /** * comedi_buf_read_free() - Free buffer space after it has been read * @s: COMEDI subdevice. * @nbytes: Maximum space to free in bytes. * * Free up to @nbytes bytes of buffer space previously reserved for reading in * the COMEDI acquisition data buffer associated with the subdevice. The * amount of space freed is limited to the amount that was reserved. * * The freed space becomes available for allocation by the writer. * * Return: The amount of space freed in bytes. */ unsigned int comedi_buf_read_free(struct comedi_subdevice *s, unsigned int nbytes) { struct comedi_async *async = s->async; unsigned int allocated; /* * ensure data has been read out of buffer before * the async read count is incremented */ smp_mb(); allocated = comedi_buf_read_n_allocated(async); if (nbytes > allocated) nbytes = allocated; async->buf_read_count += nbytes; async->buf_read_ptr += nbytes; async->buf_read_ptr %= async->prealloc_bufsz; return nbytes; } EXPORT_SYMBOL_GPL(comedi_buf_read_free); static void comedi_buf_memcpy_to(struct comedi_subdevice *s, const void *data, unsigned int num_bytes) { struct comedi_async *async = s->async; struct comedi_buf_page *buf_page_list = async->buf_map->page_list; unsigned int write_ptr = async->buf_write_ptr; while (num_bytes) { /* * Do not copy beyond page boundary. * Note: prealloc_bufsz is a multiple of PAGE_SIZE. */ unsigned int page = write_ptr >> PAGE_SHIFT; unsigned int offset = offset_in_page(write_ptr); unsigned int block_size = min(num_bytes, PAGE_SIZE - offset); memcpy(buf_page_list[page].virt_addr + offset, data, block_size); data += block_size; num_bytes -= block_size; write_ptr += block_size; if (write_ptr == async->prealloc_bufsz) write_ptr = 0; } } static void comedi_buf_memcpy_from(struct comedi_subdevice *s, void *dest, unsigned int nbytes) { struct comedi_async *async = s->async; struct comedi_buf_page *buf_page_list = async->buf_map->page_list; unsigned int read_ptr = async->buf_read_ptr; while (nbytes) { /* * Do not copy beyond page boundary. * Note: prealloc_bufsz is a multiple of PAGE_SIZE. */ unsigned int page = read_ptr >> PAGE_SHIFT; unsigned int offset = offset_in_page(read_ptr); unsigned int block_size = min(nbytes, PAGE_SIZE - offset); memcpy(dest, buf_page_list[page].virt_addr + offset, block_size); nbytes -= block_size; dest += block_size; read_ptr += block_size; if (read_ptr == async->prealloc_bufsz) read_ptr = 0; } } /** * comedi_buf_write_samples() - Write sample data to COMEDI buffer * @s: COMEDI subdevice. * @data: Pointer to source samples. * @nsamples: Number of samples to write. * * Write up to @nsamples samples to the COMEDI acquisition data buffer * associated with the subdevice, mark it as written and update the * acquisition scan progress. If there is not enough room for the specified * number of samples, the number of samples written is limited to the number * that will fit and the %COMEDI_CB_OVERFLOW event flag is set to cause the * acquisition to terminate with an overrun error. Set the %COMEDI_CB_BLOCK * event flag if any samples are written to cause waiting tasks to be woken * when the event flags are processed. * * Return: The amount of data written in bytes. */ unsigned int comedi_buf_write_samples(struct comedi_subdevice *s, const void *data, unsigned int nsamples) { unsigned int max_samples; unsigned int nbytes; /* * Make sure there is enough room in the buffer for all the samples. * If not, clamp the nsamples to the number that will fit, flag the * buffer overrun and add the samples that fit. */ max_samples = comedi_bytes_to_samples(s, comedi_buf_write_n_unalloc(s)); if (nsamples > max_samples) { dev_warn(s->device->class_dev, "buffer overrun\n"); s->async->events |= COMEDI_CB_OVERFLOW; nsamples = max_samples; } if (nsamples == 0) return 0; nbytes = comedi_buf_write_alloc(s, comedi_samples_to_bytes(s, nsamples)); comedi_buf_memcpy_to(s, data, nbytes); comedi_buf_write_free(s, nbytes); comedi_inc_scan_progress(s, nbytes); s->async->events |= COMEDI_CB_BLOCK; return nbytes; } EXPORT_SYMBOL_GPL(comedi_buf_write_samples); /** * comedi_buf_read_samples() - Read sample data from COMEDI buffer * @s: COMEDI subdevice. * @data: Pointer to destination. * @nsamples: Maximum number of samples to read. * * Read up to @nsamples samples from the COMEDI acquisition data buffer * associated with the subdevice, mark it as read and update the acquisition * scan progress. Limit the number of samples read to the number available. * Set the %COMEDI_CB_BLOCK event flag if any samples are read to cause waiting * tasks to be woken when the event flags are processed. * * Return: The amount of data read in bytes. */ unsigned int comedi_buf_read_samples(struct comedi_subdevice *s, void *data, unsigned int nsamples) { unsigned int max_samples; unsigned int nbytes; /* clamp nsamples to the number of full samples available */ max_samples = comedi_bytes_to_samples(s, comedi_buf_read_n_available(s)); if (nsamples > max_samples) nsamples = max_samples; if (nsamples == 0) return 0; nbytes = comedi_buf_read_alloc(s, comedi_samples_to_bytes(s, nsamples)); comedi_buf_memcpy_from(s, data, nbytes); comedi_buf_read_free(s, nbytes); comedi_inc_scan_progress(s, nbytes); s->async->events |= COMEDI_CB_BLOCK; return nbytes; } EXPORT_SYMBOL_GPL(comedi_buf_read_samples); |
| 125 139 14 41 207 40 140 139 9 140 9 9 135 136 135 8 8 8 8 8 6 2 6 2 8 8 8 8 8 7 168 169 167 167 167 45 37 1 8 8 6 1 1 7 7 9 9 7 6 7 6 6 15 10 9 232 12 217 11 136 15 120 10 9 9 9 9 5 5 4 3 2 1 1 1 1 8 2 1 2 4 1 4 1 3 3 3 9 3 1 3 3 1 1 1 1 1 9 1 1 1 3 1 3 2 2 1 8 2 1 2 1 2 1 2 1 1 5 1 4 4 1 4 4 35 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 | // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright 2007 Hewlett-Packard Development Company, L.P. * * This file is part of the SCTP kernel implementation * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Vlad Yasevich <vladislav.yasevich@hp.com> */ #include <crypto/sha1.h> #include <crypto/sha2.h> #include <linux/slab.h> #include <linux/types.h> #include <net/sctp/sctp.h> #include <net/sctp/auth.h> static const struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { { /* id 0 is reserved. as all 0 */ .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_0, }, { .hmac_id = SCTP_AUTH_HMAC_ID_SHA1, .hmac_len = SHA1_DIGEST_SIZE, }, { /* id 2 is reserved as well */ .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_2, }, { .hmac_id = SCTP_AUTH_HMAC_ID_SHA256, .hmac_len = SHA256_DIGEST_SIZE, } }; static bool sctp_hmac_supported(__u16 hmac_id) { return hmac_id < ARRAY_SIZE(sctp_hmac_list) && sctp_hmac_list[hmac_id].hmac_len != 0; } void sctp_auth_key_put(struct sctp_auth_bytes *key) { if (!key) return; if (refcount_dec_and_test(&key->refcnt)) { kfree_sensitive(key); SCTP_DBG_OBJCNT_DEC(keys); } } /* Create a new key structure of a given length */ static struct sctp_auth_bytes *sctp_auth_create_key(__u32 key_len, gfp_t gfp) { struct sctp_auth_bytes *key; /* Verify that we are not going to overflow INT_MAX */ if (key_len > (INT_MAX - sizeof(struct sctp_auth_bytes))) return NULL; /* Allocate the shared key */ key = kmalloc(sizeof(struct sctp_auth_bytes) + key_len, gfp); if (!key) return NULL; key->len = key_len; refcount_set(&key->refcnt, 1); SCTP_DBG_OBJCNT_INC(keys); return key; } /* Create a new shared key container with a give key id */ struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp) { struct sctp_shared_key *new; /* Allocate the shared key container */ new = kzalloc(sizeof(struct sctp_shared_key), gfp); if (!new) return NULL; INIT_LIST_HEAD(&new->key_list); refcount_set(&new->refcnt, 1); new->key_id = key_id; return new; } /* Free the shared key structure */ static void sctp_auth_shkey_destroy(struct sctp_shared_key *sh_key) { BUG_ON(!list_empty(&sh_key->key_list)); sctp_auth_key_put(sh_key->key); sh_key->key = NULL; kfree(sh_key); } void sctp_auth_shkey_release(struct sctp_shared_key *sh_key) { if (refcount_dec_and_test(&sh_key->refcnt)) sctp_auth_shkey_destroy(sh_key); } void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key) { refcount_inc(&sh_key->refcnt); } /* Destroy the entire key list. This is done during the * associon and endpoint free process. */ void sctp_auth_destroy_keys(struct list_head *keys) { struct sctp_shared_key *ep_key; struct sctp_shared_key *tmp; if (list_empty(keys)) return; key_for_each_safe(ep_key, tmp, keys) { list_del_init(&ep_key->key_list); sctp_auth_shkey_release(ep_key); } } /* Compare two byte vectors as numbers. Return values * are: * 0 - vectors are equal * < 0 - vector 1 is smaller than vector2 * > 0 - vector 1 is greater than vector2 * * Algorithm is: * This is performed by selecting the numerically smaller key vector... * If the key vectors are equal as numbers but differ in length ... * the shorter vector is considered smaller * * Examples (with small values): * 000123456789 > 123456789 (first number is longer) * 000123456789 < 234567891 (second number is larger numerically) * 123456789 > 2345678 (first number is both larger & longer) */ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1, struct sctp_auth_bytes *vector2) { int diff; int i; const __u8 *longer; diff = vector1->len - vector2->len; if (diff) { longer = (diff > 0) ? vector1->data : vector2->data; /* Check to see if the longer number is * lead-zero padded. If it is not, it * is automatically larger numerically. */ for (i = 0; i < abs(diff); i++) { if (longer[i] != 0) return diff; } } /* lengths are the same, compare numbers */ return memcmp(vector1->data, vector2->data, vector1->len); } /* * Create a key vector as described in SCTP-AUTH, Section 6.1 * The RANDOM parameter, the CHUNKS parameter and the HMAC-ALGO * parameter sent by each endpoint are concatenated as byte vectors. * These parameters include the parameter type, parameter length, and * the parameter value, but padding is omitted; all padding MUST be * removed from this concatenation before proceeding with further * computation of keys. Parameters which were not sent are simply * omitted from the concatenation process. The resulting two vectors * are called the two key vectors. */ static struct sctp_auth_bytes *sctp_auth_make_key_vector( struct sctp_random_param *random, struct sctp_chunks_param *chunks, struct sctp_hmac_algo_param *hmacs, gfp_t gfp) { struct sctp_auth_bytes *new; __u32 len; __u32 offset = 0; __u16 random_len, hmacs_len, chunks_len = 0; random_len = ntohs(random->param_hdr.length); hmacs_len = ntohs(hmacs->param_hdr.length); if (chunks) chunks_len = ntohs(chunks->param_hdr.length); len = random_len + hmacs_len + chunks_len; new = sctp_auth_create_key(len, gfp); if (!new) return NULL; memcpy(new->data, random, random_len); offset += random_len; if (chunks) { memcpy(new->data + offset, chunks, chunks_len); offset += chunks_len; } memcpy(new->data + offset, hmacs, hmacs_len); return new; } /* Make a key vector based on our local parameters */ static struct sctp_auth_bytes *sctp_auth_make_local_vector( const struct sctp_association *asoc, gfp_t gfp) { return sctp_auth_make_key_vector( (struct sctp_random_param *)asoc->c.auth_random, (struct sctp_chunks_param *)asoc->c.auth_chunks, (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs, gfp); } /* Make a key vector based on peer's parameters */ static struct sctp_auth_bytes *sctp_auth_make_peer_vector( const struct sctp_association *asoc, gfp_t gfp) { return sctp_auth_make_key_vector(asoc->peer.peer_random, asoc->peer.peer_chunks, asoc->peer.peer_hmacs, gfp); } /* Set the value of the association shared key base on the parameters * given. The algorithm is: * From the endpoint pair shared keys and the key vectors the * association shared keys are computed. This is performed by selecting * the numerically smaller key vector and concatenating it to the * endpoint pair shared key, and then concatenating the numerically * larger key vector to that. The result of the concatenation is the * association shared key. */ static struct sctp_auth_bytes *sctp_auth_asoc_set_secret( struct sctp_shared_key *ep_key, struct sctp_auth_bytes *first_vector, struct sctp_auth_bytes *last_vector, gfp_t gfp) { struct sctp_auth_bytes *secret; __u32 offset = 0; __u32 auth_len; auth_len = first_vector->len + last_vector->len; if (ep_key->key) auth_len += ep_key->key->len; secret = sctp_auth_create_key(auth_len, gfp); if (!secret) return NULL; if (ep_key->key) { memcpy(secret->data, ep_key->key->data, ep_key->key->len); offset += ep_key->key->len; } memcpy(secret->data + offset, first_vector->data, first_vector->len); offset += first_vector->len; memcpy(secret->data + offset, last_vector->data, last_vector->len); return secret; } /* Create an association shared key. Follow the algorithm * described in SCTP-AUTH, Section 6.1 */ static struct sctp_auth_bytes *sctp_auth_asoc_create_secret( const struct sctp_association *asoc, struct sctp_shared_key *ep_key, gfp_t gfp) { struct sctp_auth_bytes *local_key_vector; struct sctp_auth_bytes *peer_key_vector; struct sctp_auth_bytes *first_vector, *last_vector; struct sctp_auth_bytes *secret = NULL; int cmp; /* Now we need to build the key vectors * SCTP-AUTH , Section 6.1 * The RANDOM parameter, the CHUNKS parameter and the HMAC-ALGO * parameter sent by each endpoint are concatenated as byte vectors. * These parameters include the parameter type, parameter length, and * the parameter value, but padding is omitted; all padding MUST be * removed from this concatenation before proceeding with further * computation of keys. Parameters which were not sent are simply * omitted from the concatenation process. The resulting two vectors * are called the two key vectors. */ local_key_vector = sctp_auth_make_local_vector(asoc, gfp); peer_key_vector = sctp_auth_make_peer_vector(asoc, gfp); if (!peer_key_vector || !local_key_vector) goto out; /* Figure out the order in which the key_vectors will be * added to the endpoint shared key. * SCTP-AUTH, Section 6.1: * This is performed by selecting the numerically smaller key * vector and concatenating it to the endpoint pair shared * key, and then concatenating the numerically larger key * vector to that. If the key vectors are equal as numbers * but differ in length, then the concatenation order is the * endpoint shared key, followed by the shorter key vector, * followed by the longer key vector. Otherwise, the key * vectors are identical, and may be concatenated to the * endpoint pair key in any order. */ cmp = sctp_auth_compare_vectors(local_key_vector, peer_key_vector); if (cmp < 0) { first_vector = local_key_vector; last_vector = peer_key_vector; } else { first_vector = peer_key_vector; last_vector = local_key_vector; } secret = sctp_auth_asoc_set_secret(ep_key, first_vector, last_vector, gfp); out: sctp_auth_key_put(local_key_vector); sctp_auth_key_put(peer_key_vector); return secret; } /* * Populate the association overlay list with the list * from the endpoint. */ int sctp_auth_asoc_copy_shkeys(const struct sctp_endpoint *ep, struct sctp_association *asoc, gfp_t gfp) { struct sctp_shared_key *sh_key; struct sctp_shared_key *new; BUG_ON(!list_empty(&asoc->endpoint_shared_keys)); key_for_each(sh_key, &ep->endpoint_shared_keys) { new = sctp_auth_shkey_create(sh_key->key_id, gfp); if (!new) goto nomem; new->key = sh_key->key; sctp_auth_key_hold(new->key); list_add(&new->key_list, &asoc->endpoint_shared_keys); } return 0; nomem: sctp_auth_destroy_keys(&asoc->endpoint_shared_keys); return -ENOMEM; } /* Public interface to create the association shared key. * See code above for the algorithm. */ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) { struct sctp_auth_bytes *secret; struct sctp_shared_key *ep_key; struct sctp_chunk *chunk; /* If we don't support AUTH, or peer is not capable * we don't need to do anything. */ if (!asoc->peer.auth_capable) return 0; /* If the key_id is non-zero and we couldn't find an * endpoint pair shared key, we can't compute the * secret. * For key_id 0, endpoint pair shared key is a NULL key. */ ep_key = sctp_auth_get_shkey(asoc, asoc->active_key_id); BUG_ON(!ep_key); secret = sctp_auth_asoc_create_secret(asoc, ep_key, gfp); if (!secret) return -ENOMEM; sctp_auth_key_put(asoc->asoc_shared_key); asoc->asoc_shared_key = secret; asoc->shkey = ep_key; /* Update send queue in case any chunk already in there now * needs authenticating */ list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) { if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) { chunk->auth = 1; if (!chunk->shkey) { chunk->shkey = asoc->shkey; sctp_auth_shkey_hold(chunk->shkey); } } } return 0; } /* Find the endpoint pair shared key based on the key_id */ struct sctp_shared_key *sctp_auth_get_shkey( const struct sctp_association *asoc, __u16 key_id) { struct sctp_shared_key *key; /* First search associations set of endpoint pair shared keys */ key_for_each(key, &asoc->endpoint_shared_keys) { if (key->key_id == key_id) { if (!key->deactivated) return key; break; } } return NULL; } const struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id) { return &sctp_hmac_list[hmac_id]; } /* Get an hmac description information that we can use to build * the AUTH chunk */ const struct sctp_hmac * sctp_auth_asoc_get_hmac(const struct sctp_association *asoc) { struct sctp_hmac_algo_param *hmacs; __u16 n_elt; __u16 id = 0; int i; /* If we have a default entry, use it */ if (asoc->default_hmac_id) return &sctp_hmac_list[asoc->default_hmac_id]; /* Since we do not have a default entry, find the first entry * we support and return that. Do not cache that id. */ hmacs = asoc->peer.peer_hmacs; if (!hmacs) return NULL; n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(struct sctp_paramhdr)) >> 1; for (i = 0; i < n_elt; i++) { id = ntohs(hmacs->hmac_ids[i]); if (sctp_hmac_supported(id)) return &sctp_hmac_list[id]; } return NULL; } static int __sctp_auth_find_hmacid(__be16 *hmacs, int n_elts, __be16 hmac_id) { int found = 0; int i; for (i = 0; i < n_elts; i++) { if (hmac_id == hmacs[i]) { found = 1; break; } } return found; } /* See if the HMAC_ID is one that we claim as supported */ int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc, __be16 hmac_id) { struct sctp_hmac_algo_param *hmacs; __u16 n_elt; if (!asoc) return 0; hmacs = (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs; n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(struct sctp_paramhdr)) >> 1; return __sctp_auth_find_hmacid(hmacs->hmac_ids, n_elt, hmac_id); } /* Cache the default HMAC id. This to follow this text from SCTP-AUTH: * Section 6.1: * The receiver of a HMAC-ALGO parameter SHOULD use the first listed * algorithm it supports. */ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, struct sctp_hmac_algo_param *hmacs) { __u16 id; int i; int n_params; /* if the default id is already set, use it */ if (asoc->default_hmac_id) return; n_params = (ntohs(hmacs->param_hdr.length) - sizeof(struct sctp_paramhdr)) >> 1; for (i = 0; i < n_params; i++) { id = ntohs(hmacs->hmac_ids[i]); if (sctp_hmac_supported(id)) { asoc->default_hmac_id = id; break; } } } /* Check to see if the given chunk is supposed to be authenticated */ static int __sctp_auth_cid(enum sctp_cid chunk, struct sctp_chunks_param *param) { unsigned short len; int found = 0; int i; if (!param || param->param_hdr.length == 0) return 0; len = ntohs(param->param_hdr.length) - sizeof(struct sctp_paramhdr); /* SCTP-AUTH, Section 3.2 * The chunk types for INIT, INIT-ACK, SHUTDOWN-COMPLETE and AUTH * chunks MUST NOT be listed in the CHUNKS parameter. However, if * a CHUNKS parameter is received then the types for INIT, INIT-ACK, * SHUTDOWN-COMPLETE and AUTH chunks MUST be ignored. */ for (i = 0; !found && i < len; i++) { switch (param->chunks[i]) { case SCTP_CID_INIT: case SCTP_CID_INIT_ACK: case SCTP_CID_SHUTDOWN_COMPLETE: case SCTP_CID_AUTH: break; default: if (param->chunks[i] == chunk) found = 1; break; } } return found; } /* Check if peer requested that this chunk is authenticated */ int sctp_auth_send_cid(enum sctp_cid chunk, const struct sctp_association *asoc) { if (!asoc) return 0; if (!asoc->peer.auth_capable) return 0; return __sctp_auth_cid(chunk, asoc->peer.peer_chunks); } /* Check if we requested that peer authenticate this chunk. */ int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc) { if (!asoc) return 0; if (!asoc->peer.auth_capable) return 0; return __sctp_auth_cid(chunk, (struct sctp_chunks_param *)asoc->c.auth_chunks); } /* SCTP-AUTH: Section 6.2: * The sender MUST calculate the MAC as described in RFC2104 [2] using * the hash function H as described by the MAC Identifier and the shared * association key K based on the endpoint pair shared key described by * the shared key identifier. The 'data' used for the computation of * the AUTH-chunk is given by the AUTH chunk with its HMAC field set to * zero (as shown in Figure 6) followed by all chunks that are placed * after the AUTH chunk in the SCTP packet. */ void sctp_auth_calculate_hmac(const struct sctp_association *asoc, struct sk_buff *skb, struct sctp_auth_chunk *auth, struct sctp_shared_key *ep_key, gfp_t gfp) { struct sctp_auth_bytes *asoc_key; __u16 key_id, hmac_id; int free_key = 0; size_t data_len; __u8 *digest; /* Extract the info we need: * - hmac id * - key id */ key_id = ntohs(auth->auth_hdr.shkey_id); hmac_id = ntohs(auth->auth_hdr.hmac_id); if (key_id == asoc->active_key_id) asoc_key = asoc->asoc_shared_key; else { /* ep_key can't be NULL here */ asoc_key = sctp_auth_asoc_create_secret(asoc, ep_key, gfp); if (!asoc_key) return; free_key = 1; } data_len = skb_tail_pointer(skb) - (unsigned char *)auth; digest = (u8 *)(&auth->auth_hdr + 1); if (hmac_id == SCTP_AUTH_HMAC_ID_SHA1) { hmac_sha1_usingrawkey(asoc_key->data, asoc_key->len, (const u8 *)auth, data_len, digest); } else { WARN_ON_ONCE(hmac_id != SCTP_AUTH_HMAC_ID_SHA256); hmac_sha256_usingrawkey(asoc_key->data, asoc_key->len, (const u8 *)auth, data_len, digest); } if (free_key) sctp_auth_key_put(asoc_key); } /* API Helpers */ /* Add a chunk to the endpoint authenticated chunk list */ int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id) { struct sctp_chunks_param *p = ep->auth_chunk_list; __u16 nchunks; __u16 param_len; /* If this chunk is already specified, we are done */ if (__sctp_auth_cid(chunk_id, p)) return 0; /* Check if we can add this chunk to the array */ param_len = ntohs(p->param_hdr.length); nchunks = param_len - sizeof(struct sctp_paramhdr); if (nchunks == SCTP_NUM_CHUNK_TYPES) return -EINVAL; p->chunks[nchunks] = chunk_id; p->param_hdr.length = htons(param_len + 1); return 0; } /* Add hmac identifires to the endpoint list of supported hmac ids */ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep, struct sctp_hmacalgo *hmacs) { int has_sha1 = 0; __u16 id; int i; /* Scan the list looking for unsupported id. Also make sure that * SHA1 is specified. */ for (i = 0; i < hmacs->shmac_num_idents; i++) { id = hmacs->shmac_idents[i]; if (!sctp_hmac_supported(id)) return -EOPNOTSUPP; if (SCTP_AUTH_HMAC_ID_SHA1 == id) has_sha1 = 1; } if (!has_sha1) return -EINVAL; for (i = 0; i < hmacs->shmac_num_idents; i++) ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]); ep->auth_hmacs_list->param_hdr.length = htons(sizeof(struct sctp_paramhdr) + hmacs->shmac_num_idents * sizeof(__u16)); return 0; } /* Set a new shared key on either endpoint or association. If the * key with a same ID already exists, replace the key (remove the * old key and add a new one). */ int sctp_auth_set_key(struct sctp_endpoint *ep, struct sctp_association *asoc, struct sctp_authkey *auth_key) { struct sctp_shared_key *cur_key, *shkey; struct sctp_auth_bytes *key; struct list_head *sh_keys; int replace = 0; /* Try to find the given key id to see if * we are doing a replace, or adding a new key */ if (asoc) { if (!asoc->peer.auth_capable) return -EACCES; sh_keys = &asoc->endpoint_shared_keys; } else { if (!ep->auth_enable) return -EACCES; sh_keys = &ep->endpoint_shared_keys; } key_for_each(shkey, sh_keys) { if (shkey->key_id == auth_key->sca_keynumber) { replace = 1; break; } } cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, GFP_KERNEL); if (!cur_key) return -ENOMEM; /* Create a new key data based on the info passed in */ key = sctp_auth_create_key(auth_key->sca_keylength, GFP_KERNEL); if (!key) { kfree(cur_key); return -ENOMEM; } memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength); cur_key->key = key; if (!replace) { list_add(&cur_key->key_list, sh_keys); return 0; } list_del_init(&shkey->key_list); list_add(&cur_key->key_list, sh_keys); if (asoc && asoc->active_key_id == auth_key->sca_keynumber && sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL)) { list_del_init(&cur_key->key_list); sctp_auth_shkey_release(cur_key); list_add(&shkey->key_list, sh_keys); return -ENOMEM; } sctp_auth_shkey_release(shkey); return 0; } int sctp_auth_set_active_key(struct sctp_endpoint *ep, struct sctp_association *asoc, __u16 key_id) { struct sctp_shared_key *key; struct list_head *sh_keys; int found = 0; /* The key identifier MUST correst to an existing key */ if (asoc) { if (!asoc->peer.auth_capable) return -EACCES; sh_keys = &asoc->endpoint_shared_keys; } else { if (!ep->auth_enable) return -EACCES; sh_keys = &ep->endpoint_shared_keys; } key_for_each(key, sh_keys) { if (key->key_id == key_id) { found = 1; break; } } if (!found || key->deactivated) return -EINVAL; if (asoc) { __u16 active_key_id = asoc->active_key_id; asoc->active_key_id = key_id; if (sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL)) { asoc->active_key_id = active_key_id; return -ENOMEM; } } else ep->active_key_id = key_id; return 0; } int sctp_auth_del_key_id(struct sctp_endpoint *ep, struct sctp_association *asoc, __u16 key_id) { struct sctp_shared_key *key; struct list_head *sh_keys; int found = 0; /* The key identifier MUST NOT be the current active key * The key identifier MUST correst to an existing key */ if (asoc) { if (!asoc->peer.auth_capable) return -EACCES; if (asoc->active_key_id == key_id) return -EINVAL; sh_keys = &asoc->endpoint_shared_keys; } else { if (!ep->auth_enable) return -EACCES; if (ep->active_key_id == key_id) return -EINVAL; sh_keys = &ep->endpoint_shared_keys; } key_for_each(key, sh_keys) { if (key->key_id == key_id) { found = 1; break; } } if (!found) return -EINVAL; /* Delete the shared key */ list_del_init(&key->key_list); sctp_auth_shkey_release(key); return 0; } int sctp_auth_deact_key_id(struct sctp_endpoint *ep, struct sctp_association *asoc, __u16 key_id) { struct sctp_shared_key *key; struct list_head *sh_keys; int found = 0; /* The key identifier MUST NOT be the current active key * The key identifier MUST correst to an existing key */ if (asoc) { if (!asoc->peer.auth_capable) return -EACCES; if (asoc->active_key_id == key_id) return -EINVAL; sh_keys = &asoc->endpoint_shared_keys; } else { if (!ep->auth_enable) return -EACCES; if (ep->active_key_id == key_id) return -EINVAL; sh_keys = &ep->endpoint_shared_keys; } key_for_each(key, sh_keys) { if (key->key_id == key_id) { found = 1; break; } } if (!found) return -EINVAL; /* refcnt == 1 and !list_empty mean it's not being used anywhere * and deactivated will be set, so it's time to notify userland * that this shkey can be freed. */ if (asoc && !list_empty(&key->key_list) && refcount_read(&key->refcnt) == 1) { struct sctp_ulpevent *ev; ev = sctp_ulpevent_make_authkey(asoc, key->key_id, SCTP_AUTH_FREE_KEY, GFP_KERNEL); if (ev) asoc->stream.si->enqueue_event(&asoc->ulpq, ev); } key->deactivated = 1; return 0; } int sctp_auth_init(struct sctp_endpoint *ep, gfp_t gfp) { /* Allocate space for HMACS and CHUNKS authentication * variables. There are arrays that we encode directly * into parameters to make the rest of the operations easier. */ if (!ep->auth_hmacs_list) { struct sctp_hmac_algo_param *auth_hmacs; auth_hmacs = kzalloc(struct_size(auth_hmacs, hmac_ids, SCTP_AUTH_NUM_HMACS), gfp); if (!auth_hmacs) goto nomem; /* Initialize the HMACS parameter. * SCTP-AUTH: Section 3.3 * Every endpoint supporting SCTP chunk authentication MUST * support the HMAC based on the SHA-1 algorithm. */ auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO; auth_hmacs->param_hdr.length = htons(sizeof(struct sctp_paramhdr) + 2); auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1); ep->auth_hmacs_list = auth_hmacs; } if (!ep->auth_chunk_list) { struct sctp_chunks_param *auth_chunks; auth_chunks = kzalloc(sizeof(*auth_chunks) + SCTP_NUM_CHUNK_TYPES, gfp); if (!auth_chunks) goto nomem; /* Initialize the CHUNKS parameter */ auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS; auth_chunks->param_hdr.length = htons(sizeof(struct sctp_paramhdr)); ep->auth_chunk_list = auth_chunks; } return 0; nomem: /* Free all allocations */ kfree(ep->auth_hmacs_list); kfree(ep->auth_chunk_list); ep->auth_hmacs_list = NULL; ep->auth_chunk_list = NULL; return -ENOMEM; } void sctp_auth_free(struct sctp_endpoint *ep) { kfree(ep->auth_hmacs_list); kfree(ep->auth_chunk_list); ep->auth_hmacs_list = NULL; ep->auth_chunk_list = NULL; } |
| 8 2 2 4 1 1 1 1 3 15 12 3 1 15 1 26 25 27 2 4 6 10 19 1 1 1 2 1 11 9 1 8 1 3 3 26 27 27 7 1 1 1 15 1 22 1 11 1 3 13 14 365 372 105 264 51 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 2 2 2 1 2 1 1 1 3 1 1 1 2 1 7 1 1 1 4 1 1 2 1 1 1 1 1 133 1 1 1 3 3 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 3 1 3 1 2 1 1 1 3 1 2 1 1 3 3 1 1 1 1 1 1 13 5 1 9 19 1 3 3 42 1 20 1 391 9 14 331 35 8 4 1 1 1 1 2 2 8 1 7 1 3 4 109 109 8 97 25 4 2 1 8 1 1 7 1 1 8 1 8 1 6 1 1 8 1 7 2 2 1 1 1 1 1 1 1 1 1 8 1 7 1 1 1 1 1 1 1 1 1 3 1 2 1 1 1 2 1 1 1 1 2 2 1 1 2 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 2 1 1 1 1 41 40 132 2 23 90 1 15 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 | // SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 BSD socket options interface * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Based on linux/net/ipv4/ip_sockglue.c * * FIXME: Make the setsockopt code POSIX compliant: That is * * o Truncate getsockopt returns * o Return an optlen of the truncated length if need be * * Changes: * David L Stevens <dlstevens@us.ibm.com>: * - added multicast source filtering API for MLDv2 */ #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/mroute6.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/init.h> #include <linux/sysctl.h> #include <linux/netfilter.h> #include <linux/slab.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/ndisc.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/inet_common.h> #include <net/tcp.h> #include <net/udp.h> #include <net/udplite.h> #include <net/xfrm.h> #include <net/compat.h> #include <net/seg6.h> #include <net/psp.h> #include <linux/uaccess.h> struct ip6_ra_chain *ip6_ra_chain; DEFINE_RWLOCK(ip6_ra_lock); DEFINE_STATIC_KEY_FALSE(ip6_min_hopcount); int ip6_ra_control(struct sock *sk, int sel) { struct ip6_ra_chain *ra, *new_ra, **rap; /* RA packet may be delivered ONLY to IPPROTO_RAW socket */ if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_RAW) return -ENOPROTOOPT; new_ra = (sel >= 0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; if (sel >= 0 && !new_ra) return -ENOMEM; write_lock_bh(&ip6_ra_lock); for (rap = &ip6_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { if (ra->sk == sk) { if (sel >= 0) { write_unlock_bh(&ip6_ra_lock); kfree(new_ra); return -EADDRINUSE; } *rap = ra->next; write_unlock_bh(&ip6_ra_lock); sock_put(sk); kfree(ra); return 0; } } if (!new_ra) { write_unlock_bh(&ip6_ra_lock); return -ENOBUFS; } new_ra->sk = sk; new_ra->sel = sel; new_ra->next = ra; *rap = new_ra; sock_hold(sk); write_unlock_bh(&ip6_ra_lock); return 0; } struct ipv6_txoptions *ipv6_update_options(struct sock *sk, struct ipv6_txoptions *opt) { if (inet_test_bit(IS_ICSK, sk)) { if (opt && !((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && inet_sk(sk)->inet_daddr != LOOPBACK4_IPV6) { struct inet_connection_sock *icsk = inet_csk(sk); icsk->icsk_ext_hdr_len = psp_sk_overhead(sk) + opt->opt_flen + opt->opt_nflen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } opt = unrcu_pointer(xchg(&inet6_sk(sk)->opt, RCU_INITIALIZER(opt))); sk_dst_reset(sk); return opt; } static int copy_group_source_from_sockptr(struct group_source_req *greqs, sockptr_t optval, int optlen) { if (in_compat_syscall()) { struct compat_group_source_req gr32; if (optlen < sizeof(gr32)) return -EINVAL; if (copy_from_sockptr(&gr32, optval, sizeof(gr32))) return -EFAULT; greqs->gsr_interface = gr32.gsr_interface; greqs->gsr_group = gr32.gsr_group; greqs->gsr_source = gr32.gsr_source; } else { if (optlen < sizeof(*greqs)) return -EINVAL; if (copy_from_sockptr(greqs, optval, sizeof(*greqs))) return -EFAULT; } return 0; } static int do_ipv6_mcast_group_source(struct sock *sk, int optname, sockptr_t optval, int optlen) { struct group_source_req greqs; int omode, add; int ret; ret = copy_group_source_from_sockptr(&greqs, optval, optlen); if (ret) return ret; if (greqs.gsr_group.ss_family != AF_INET6 || greqs.gsr_source.ss_family != AF_INET6) return -EADDRNOTAVAIL; if (optname == MCAST_BLOCK_SOURCE) { omode = MCAST_EXCLUDE; add = 1; } else if (optname == MCAST_UNBLOCK_SOURCE) { omode = MCAST_EXCLUDE; add = 0; } else if (optname == MCAST_JOIN_SOURCE_GROUP) { struct sockaddr_in6 *psin6; int retv; psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface, &psin6->sin6_addr, MCAST_INCLUDE); /* prior join w/ different source is ok */ if (retv && retv != -EADDRINUSE) return retv; omode = MCAST_INCLUDE; add = 1; } else /* MCAST_LEAVE_SOURCE_GROUP */ { omode = MCAST_INCLUDE; add = 0; } return ip6_mc_source(add, omode, sk, &greqs); } static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { struct group_filter *gsf; int ret; if (optlen < GROUP_FILTER_SIZE(0)) return -EINVAL; if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max)) return -ENOBUFS; gsf = memdup_sockptr(optval, optlen); if (IS_ERR(gsf)) return PTR_ERR(gsf); /* numsrc >= (4G-140)/128 overflow in 32 bits */ ret = -ENOBUFS; if (gsf->gf_numsrc >= 0x1ffffffU || gsf->gf_numsrc > sysctl_mld_max_msf) goto out_free_gsf; ret = -EINVAL; if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) goto out_free_gsf; ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex); out_free_gsf: kfree(gsf); return ret; } static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter *gf32; void *p; int ret; int n; if (optlen < size0) return -EINVAL; if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4) return -ENOBUFS; p = kmalloc(optlen + 4, GFP_KERNEL); if (!p) return -ENOMEM; gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */ ret = -EFAULT; if (copy_from_sockptr(gf32, optval, optlen)) goto out_free_p; /* numsrc >= (4G-140)/128 overflow in 32 bits */ ret = -ENOBUFS; n = gf32->gf_numsrc; if (n >= 0x1ffffffU || n > sysctl_mld_max_msf) goto out_free_p; ret = -EINVAL; if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen) goto out_free_p; ret = ip6_mc_msfilter(sk, &(struct group_filter){ .gf_interface = gf32->gf_interface, .gf_group = gf32->gf_group, .gf_fmode = gf32->gf_fmode, .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex); out_free_p: kfree(p); return ret; } static int ipv6_mcast_join_leave(struct sock *sk, int optname, sockptr_t optval, int optlen) { struct sockaddr_in6 *psin6; struct group_req greq; if (optlen < sizeof(greq)) return -EINVAL; if (copy_from_sockptr(&greq, optval, sizeof(greq))) return -EFAULT; if (greq.gr_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; psin6 = (struct sockaddr_in6 *)&greq.gr_group; if (optname == MCAST_JOIN_GROUP) return ipv6_sock_mc_join(sk, greq.gr_interface, &psin6->sin6_addr); return ipv6_sock_mc_drop(sk, greq.gr_interface, &psin6->sin6_addr); } static int compat_ipv6_mcast_join_leave(struct sock *sk, int optname, sockptr_t optval, int optlen) { struct compat_group_req gr32; struct sockaddr_in6 *psin6; if (optlen < sizeof(gr32)) return -EINVAL; if (copy_from_sockptr(&gr32, optval, sizeof(gr32))) return -EFAULT; if (gr32.gr_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; psin6 = (struct sockaddr_in6 *)&gr32.gr_group; if (optname == MCAST_JOIN_GROUP) return ipv6_sock_mc_join(sk, gr32.gr_interface, &psin6->sin6_addr); return ipv6_sock_mc_drop(sk, gr32.gr_interface, &psin6->sin6_addr); } static int ipv6_set_opt_hdr(struct sock *sk, int optname, sockptr_t optval, int optlen) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_opt_hdr *new = NULL; struct net *net = sock_net(sk); struct ipv6_txoptions *opt; int err; /* hop-by-hop / destination options are privileged option */ if (optname != IPV6_RTHDR && !sockopt_ns_capable(net->user_ns, CAP_NET_RAW)) return -EPERM; /* remove any sticky options header with a zero option * length, per RFC3542. */ if (optlen > 0) { if (sockptr_is_null(optval)) return -EINVAL; if (optlen < sizeof(struct ipv6_opt_hdr) || optlen & 0x7 || optlen > 8 * 255) return -EINVAL; new = memdup_sockptr(optval, optlen); if (IS_ERR(new)) return PTR_ERR(new); if (unlikely(ipv6_optlen(new) > optlen)) { kfree(new); return -EINVAL; } } opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); opt = ipv6_renew_options(sk, opt, optname, new); kfree(new); if (IS_ERR(opt)) return PTR_ERR(opt); /* routing header option needs extra check */ err = -EINVAL; if (optname == IPV6_RTHDR && opt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; switch (rthdr->type) { #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (rthdr->hdrlen != 2 || rthdr->segments_left != 1) goto sticky_done; break; #endif case IPV6_SRCRT_TYPE_4: { struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt; if (!seg6_validate_srh(srh, optlen, false)) goto sticky_done; break; } default: goto sticky_done; } } err = 0; opt = ipv6_update_options(sk, opt); sticky_done: if (opt) { atomic_sub(opt->tot_len, &sk->sk_omem_alloc); txopt_put(opt); } return err; } int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); int retv = -ENOPROTOOPT; int val, valbool; if (sockptr_is_null(optval)) val = 0; else { if (optlen >= sizeof(int)) { if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; } else val = 0; } valbool = (val != 0); if (ip6_mroute_opt(optname)) return ip6_mroute_setsockopt(sk, optname, optval, optlen); /* Handle options that can be set without locking the socket. */ switch (optname) { case IPV6_UNICAST_HOPS: if (optlen < sizeof(int)) return -EINVAL; if (val > 255 || val < -1) return -EINVAL; WRITE_ONCE(np->hop_limit, val); return 0; case IPV6_MULTICAST_LOOP: if (optlen < sizeof(int)) return -EINVAL; if (val != valbool) return -EINVAL; inet6_assign_bit(MC6_LOOP, sk, valbool); return 0; case IPV6_MULTICAST_HOPS: if (sk->sk_type == SOCK_STREAM) return retv; if (optlen < sizeof(int)) return -EINVAL; if (val > 255 || val < -1) return -EINVAL; WRITE_ONCE(np->mcast_hops, val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); return 0; case IPV6_MTU: if (optlen < sizeof(int)) return -EINVAL; if (val && val < IPV6_MIN_MTU) return -EINVAL; WRITE_ONCE(np->frag_size, val); return 0; case IPV6_MINHOPCOUNT: if (optlen < sizeof(int)) return -EINVAL; if (val < 0 || val > 255) return -EINVAL; if (val) static_branch_enable(&ip6_min_hopcount); /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount * while we are changing it. */ WRITE_ONCE(np->min_hopcount, val); return 0; case IPV6_RECVERR_RFC4884: if (optlen < sizeof(int)) return -EINVAL; if (val < 0 || val > 1) return -EINVAL; inet6_assign_bit(RECVERR6_RFC4884, sk, valbool); return 0; case IPV6_MULTICAST_ALL: if (optlen < sizeof(int)) return -EINVAL; inet6_assign_bit(MC6_ALL, sk, valbool); return 0; case IPV6_AUTOFLOWLABEL: inet6_assign_bit(AUTOFLOWLABEL, sk, valbool); inet6_set_bit(AUTOFLOWLABEL_SET, sk); return 0; case IPV6_DONTFRAG: inet6_assign_bit(DONTFRAG, sk, valbool); return 0; case IPV6_RECVERR: if (optlen < sizeof(int)) return -EINVAL; inet6_assign_bit(RECVERR6, sk, valbool); if (!val) skb_errqueue_purge(&sk->sk_error_queue); return 0; case IPV6_ROUTER_ALERT_ISOLATE: if (optlen < sizeof(int)) return -EINVAL; inet6_assign_bit(RTALERT_ISOLATE, sk, valbool); return 0; case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) return -EINVAL; if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) return -EINVAL; WRITE_ONCE(np->pmtudisc, val); return 0; case IPV6_FLOWINFO_SEND: if (optlen < sizeof(int)) return -EINVAL; inet6_assign_bit(SNDFLOW, sk, valbool); return 0; case IPV6_ADDR_PREFERENCES: if (optlen < sizeof(int)) return -EINVAL; return ip6_sock_set_addr_preferences(sk, val); case IPV6_MULTICAST_IF: if (sk->sk_type == SOCK_STREAM) return -ENOPROTOOPT; if (optlen < sizeof(int)) return -EINVAL; if (val) { struct net_device *dev; int bound_dev_if, midx; rcu_read_lock(); dev = dev_get_by_index_rcu(net, val); if (!dev) { rcu_read_unlock(); return -ENODEV; } midx = l3mdev_master_ifindex_rcu(dev); rcu_read_unlock(); bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); if (bound_dev_if && bound_dev_if != val && (!midx || midx != bound_dev_if)) return -EINVAL; } WRITE_ONCE(np->mcast_oif, val); return 0; case IPV6_UNICAST_IF: { struct net_device *dev; int ifindex; if (optlen != sizeof(int)) return -EINVAL; ifindex = (__force int)ntohl((__force __be32)val); if (!ifindex) { WRITE_ONCE(np->ucast_oif, 0); return 0; } dev = dev_get_by_index(net, ifindex); if (!dev) return -EADDRNOTAVAIL; dev_put(dev); if (READ_ONCE(sk->sk_bound_dev_if)) return -EINVAL; WRITE_ONCE(np->ucast_oif, ifindex); return 0; } } sockopt_lock_sock(sk); /* Another thread has converted the socket into IPv4 with * IPV6_ADDRFORM concurrently. */ if (unlikely(sk->sk_family != AF_INET6)) goto unlock; switch (optname) { case IPV6_ADDRFORM: if (optlen < sizeof(int)) goto e_inval; if (val == PF_INET) { if (sk->sk_type == SOCK_RAW) break; if (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_UDPLITE) { struct udp_sock *up = udp_sk(sk); if (up->pending == AF_INET6) { retv = -EBUSY; break; } } else if (sk->sk_protocol == IPPROTO_TCP) { if (sk->sk_prot != &tcpv6_prot) { retv = -EBUSY; break; } } else { break; } if (sk->sk_state != TCP_ESTABLISHED) { retv = -ENOTCONN; break; } if (ipv6_only_sock(sk) || !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { retv = -EADDRNOTAVAIL; break; } __ipv6_sock_mc_close(sk); __ipv6_sock_ac_close(sk); if (sk->sk_protocol == IPPROTO_TCP) { struct inet_connection_sock *icsk = inet_csk(sk); sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, &tcp_prot, 1); /* Paired with READ_ONCE(sk->sk_prot) in inet6_stream_ops */ WRITE_ONCE(sk->sk_prot, &tcp_prot); /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific); WRITE_ONCE(sk->sk_socket->ops, &inet_stream_ops); WRITE_ONCE(sk->sk_family, PF_INET); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { struct proto *prot = &udp_prot; if (sk->sk_protocol == IPPROTO_UDPLITE) prot = &udplite_prot; sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, prot, 1); /* Paired with READ_ONCE(sk->sk_prot) in inet6_dgram_ops */ WRITE_ONCE(sk->sk_prot, prot); WRITE_ONCE(sk->sk_socket->ops, &inet_dgram_ops); WRITE_ONCE(sk->sk_family, PF_INET); } /* Disable all options not to allocate memory anymore, * but there is still a race. See the lockless path * in udpv6_sendmsg() and ipv6_local_rxpmtu(). */ np->rxopt.all = 0; inet6_cleanup_sock(sk); module_put(THIS_MODULE); retv = 0; break; } goto e_inval; case IPV6_V6ONLY: if (optlen < sizeof(int) || inet_sk(sk)->inet_num) goto e_inval; sk->sk_ipv6only = valbool; retv = 0; break; case IPV6_RECVPKTINFO: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxinfo = valbool; retv = 0; break; case IPV6_2292PKTINFO: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxoinfo = valbool; retv = 0; break; case IPV6_RECVHOPLIMIT: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxhlim = valbool; retv = 0; break; case IPV6_2292HOPLIMIT: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxohlim = valbool; retv = 0; break; case IPV6_RECVRTHDR: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.srcrt = valbool; retv = 0; break; case IPV6_2292RTHDR: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.osrcrt = valbool; retv = 0; break; case IPV6_RECVHOPOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.hopopts = valbool; retv = 0; break; case IPV6_2292HOPOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.ohopopts = valbool; retv = 0; break; case IPV6_RECVDSTOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.dstopts = valbool; retv = 0; break; case IPV6_2292DSTOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.odstopts = valbool; retv = 0; break; case IPV6_TCLASS: if (optlen < sizeof(int)) goto e_inval; if (val < -1 || val > 0xff) goto e_inval; /* RFC 3542, 6.5: default traffic class of 0x0 */ if (val == -1) val = 0; if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; val |= np->tclass & INET_ECN_MASK; } if (np->tclass != val) { np->tclass = val; sk_dst_reset(sk); } retv = 0; break; case IPV6_RECVTCLASS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxtclass = valbool; retv = 0; break; case IPV6_FLOWINFO: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxflow = valbool; retv = 0; break; case IPV6_RECVPATHMTU: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxpmtu = valbool; retv = 0; break; case IPV6_TRANSPARENT: if (valbool && !sockopt_ns_capable(net->user_ns, CAP_NET_RAW) && !sockopt_ns_capable(net->user_ns, CAP_NET_ADMIN)) { retv = -EPERM; break; } if (optlen < sizeof(int)) goto e_inval; /* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */ inet_assign_bit(TRANSPARENT, sk, valbool); retv = 0; break; case IPV6_FREEBIND: if (optlen < sizeof(int)) goto e_inval; /* we also don't have a separate freebind bit for IPV6 */ inet_assign_bit(FREEBIND, sk, valbool); retv = 0; break; case IPV6_RECVORIGDSTADDR: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxorigdstaddr = valbool; retv = 0; break; case IPV6_HOPOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: retv = ipv6_set_opt_hdr(sk, optname, optval, optlen); break; case IPV6_PKTINFO: { struct in6_pktinfo pkt; if (optlen == 0) goto e_inval; else if (optlen < sizeof(struct in6_pktinfo) || sockptr_is_null(optval)) goto e_inval; if (copy_from_sockptr(&pkt, optval, sizeof(pkt))) { retv = -EFAULT; break; } if (!sk_dev_equal_l3scope(sk, pkt.ipi6_ifindex)) goto e_inval; np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex; np->sticky_pktinfo.ipi6_addr = pkt.ipi6_addr; retv = 0; break; } case IPV6_2292PKTOPTIONS: { struct ipv6_txoptions *opt = NULL; struct msghdr msg; struct flowi6 fl6; struct ipcm6_cookie ipc6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; if (optlen == 0) goto update; /* 1K is probably excessive * 1K is surely not enough, 2K per standard header is 16K. */ retv = -EINVAL; if (optlen > 64*1024) break; opt = sock_kmalloc(sk, sizeof(*opt) + optlen, GFP_KERNEL); retv = -ENOBUFS; if (!opt) break; memset(opt, 0, sizeof(*opt)); refcount_set(&opt->refcnt, 1); opt->tot_len = sizeof(*opt) + optlen; retv = -EFAULT; if (copy_from_sockptr(opt + 1, optval, optlen)) goto done; msg.msg_controllen = optlen; msg.msg_control_is_user = false; msg.msg_control = (void *)(opt+1); ipc6.opt = opt; retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6); if (retv) goto done; update: retv = 0; opt = ipv6_update_options(sk, opt); done: if (opt) { atomic_sub(opt->tot_len, &sk->sk_omem_alloc); txopt_put(opt); } break; } case IPV6_ADD_MEMBERSHIP: case IPV6_DROP_MEMBERSHIP: { struct ipv6_mreq mreq; if (optlen < sizeof(struct ipv6_mreq)) goto e_inval; retv = -EPROTO; if (inet_test_bit(IS_ICSK, sk)) break; retv = -EFAULT; if (copy_from_sockptr(&mreq, optval, sizeof(struct ipv6_mreq))) break; if (optname == IPV6_ADD_MEMBERSHIP) retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); else retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); break; } case IPV6_JOIN_ANYCAST: case IPV6_LEAVE_ANYCAST: { struct ipv6_mreq mreq; if (optlen < sizeof(struct ipv6_mreq)) goto e_inval; retv = -EFAULT; if (copy_from_sockptr(&mreq, optval, sizeof(struct ipv6_mreq))) break; if (optname == IPV6_JOIN_ANYCAST) retv = ipv6_sock_ac_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); else retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); break; } case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: if (in_compat_syscall()) retv = compat_ipv6_mcast_join_leave(sk, optname, optval, optlen); else retv = ipv6_mcast_join_leave(sk, optname, optval, optlen); break; case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: retv = do_ipv6_mcast_group_source(sk, optname, optval, optlen); break; case MCAST_MSFILTER: if (in_compat_syscall()) retv = compat_ipv6_set_mcast_msfilter(sk, optval, optlen); else retv = ipv6_set_mcast_msfilter(sk, optval, optlen); break; case IPV6_ROUTER_ALERT: if (optlen < sizeof(int)) goto e_inval; retv = ip6_ra_control(sk, val); if (retv == 0) inet6_assign_bit(RTALERT, sk, valbool); break; case IPV6_FLOWLABEL_MGR: retv = ipv6_flowlabel_opt(sk, optval, optlen); break; case IPV6_IPSEC_POLICY: case IPV6_XFRM_POLICY: retv = -EPERM; if (!sockopt_ns_capable(net->user_ns, CAP_NET_ADMIN)) break; retv = xfrm_user_policy(sk, optname, optval, optlen); break; case IPV6_RECVFRAGSIZE: np->rxopt.bits.recvfragsize = valbool; retv = 0; break; } unlock: sockopt_release_sock(sk); return retv; e_inval: retv = -EINVAL; goto unlock; } int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { int err; if (level == SOL_IP && sk->sk_type != SOCK_RAW) return ip_setsockopt(sk, level, optname, optval, optlen); if (level != SOL_IPV6) return -ENOPROTOOPT; err = do_ipv6_setsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && optname != IPV6_XFRM_POLICY) err = nf_setsockopt(sk, PF_INET6, optname, optval, optlen); #endif return err; } EXPORT_SYMBOL(ipv6_setsockopt); static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, int optname, sockptr_t optval, int len) { struct ipv6_opt_hdr *hdr; if (!opt) return 0; switch (optname) { case IPV6_HOPOPTS: hdr = opt->hopopt; break; case IPV6_RTHDRDSTOPTS: hdr = opt->dst0opt; break; case IPV6_RTHDR: hdr = (struct ipv6_opt_hdr *)opt->srcrt; break; case IPV6_DSTOPTS: hdr = opt->dst1opt; break; default: return -EINVAL; /* should not happen */ } if (!hdr) return 0; len = min_t(unsigned int, len, ipv6_optlen(hdr)); if (copy_to_sockptr(optval, hdr, len)) return -EFAULT; return len; } static int ipv6_get_msfilter(struct sock *sk, sockptr_t optval, sockptr_t optlen, int len) { const int size0 = offsetof(struct group_filter, gf_slist_flex); struct group_filter gsf; int num; int err; if (len < size0) return -EINVAL; if (copy_from_sockptr(&gsf, optval, size0)) return -EFAULT; if (gsf.gf_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; num = gsf.gf_numsrc; sockopt_lock_sock(sk); err = ip6_mc_msfget(sk, &gsf, optval, size0); if (!err) { if (num > gsf.gf_numsrc) num = gsf.gf_numsrc; len = GROUP_FILTER_SIZE(num); if (copy_to_sockptr(optlen, &len, sizeof(int)) || copy_to_sockptr(optval, &gsf, size0)) err = -EFAULT; } sockopt_release_sock(sk); return err; } static int compat_ipv6_get_msfilter(struct sock *sk, sockptr_t optval, sockptr_t optlen, int len) { const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter gf32; struct group_filter gf; int err; int num; if (len < size0) return -EINVAL; if (copy_from_sockptr(&gf32, optval, size0)) return -EFAULT; gf.gf_interface = gf32.gf_interface; gf.gf_fmode = gf32.gf_fmode; num = gf.gf_numsrc = gf32.gf_numsrc; gf.gf_group = gf32.gf_group; if (gf.gf_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; sockopt_lock_sock(sk); err = ip6_mc_msfget(sk, &gf, optval, size0); sockopt_release_sock(sk); if (err) return err; if (num > gf.gf_numsrc) num = gf.gf_numsrc; len = GROUP_FILTER_SIZE(num) - (sizeof(gf)-sizeof(gf32)); if (copy_to_sockptr(optlen, &len, sizeof(int)) || copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_fmode), &gf.gf_fmode, sizeof(gf32.gf_fmode)) || copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_numsrc), &gf.gf_numsrc, sizeof(gf32.gf_numsrc))) return -EFAULT; return 0; } int do_ipv6_getsockopt(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen) { struct ipv6_pinfo *np = inet6_sk(sk); int len; int val; if (ip6_mroute_opt(optname)) return ip6_mroute_getsockopt(sk, optname, optval, optlen); if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; switch (optname) { case IPV6_ADDRFORM: if (sk->sk_protocol != IPPROTO_UDP && sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; val = sk->sk_family; break; case MCAST_MSFILTER: if (in_compat_syscall()) return compat_ipv6_get_msfilter(sk, optval, optlen, len); return ipv6_get_msfilter(sk, optval, optlen, len); case IPV6_2292PKTOPTIONS: { struct msghdr msg; struct sk_buff *skb; if (sk->sk_type != SOCK_STREAM) return -ENOPROTOOPT; if (optval.is_kernel) { msg.msg_control_is_user = false; msg.msg_control = optval.kernel; } else { msg.msg_control_is_user = true; msg.msg_control_user = optval.user; } msg.msg_controllen = len; msg.msg_flags = 0; sockopt_lock_sock(sk); skb = np->pktoptions; if (skb) ip6_datagram_recv_ctl(sk, &msg, skb); sockopt_release_sock(sk); if (!skb) { if (np->rxopt.bits.rxinfo) { int mcast_oif = READ_ONCE(np->mcast_oif); struct in6_pktinfo src_info; src_info.ipi6_ifindex = mcast_oif ? : np->sticky_pktinfo.ipi6_ifindex; src_info.ipi6_addr = mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { int hlim = READ_ONCE(np->mcast_hops); put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxtclass) { int tclass = (int)ip6_tclass(np->rcv_flowinfo); put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); } if (np->rxopt.bits.rxoinfo) { int mcast_oif = READ_ONCE(np->mcast_oif); struct in6_pktinfo src_info; src_info.ipi6_ifindex = mcast_oif ? : np->sticky_pktinfo.ipi6_ifindex; src_info.ipi6_addr = mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { int hlim = READ_ONCE(np->mcast_hops); put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxflow) { __be32 flowinfo = np->rcv_flowinfo; put_cmsg(&msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); } } len -= msg.msg_controllen; return copy_to_sockptr(optlen, &len, sizeof(int)); } case IPV6_MTU: { struct dst_entry *dst; val = 0; rcu_read_lock(); dst = __sk_dst_get(sk); if (dst) val = dst_mtu(dst); rcu_read_unlock(); if (!val) return -ENOTCONN; break; } case IPV6_V6ONLY: val = sk->sk_ipv6only; break; case IPV6_RECVPKTINFO: val = np->rxopt.bits.rxinfo; break; case IPV6_2292PKTINFO: val = np->rxopt.bits.rxoinfo; break; case IPV6_RECVHOPLIMIT: val = np->rxopt.bits.rxhlim; break; case IPV6_2292HOPLIMIT: val = np->rxopt.bits.rxohlim; break; case IPV6_RECVRTHDR: val = np->rxopt.bits.srcrt; break; case IPV6_2292RTHDR: val = np->rxopt.bits.osrcrt; break; case IPV6_HOPOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: { struct ipv6_txoptions *opt; sockopt_lock_sock(sk); opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len); sockopt_release_sock(sk); /* check if ipv6_getsockopt_sticky() returns err code */ if (len < 0) return len; return copy_to_sockptr(optlen, &len, sizeof(int)); } case IPV6_RECVHOPOPTS: val = np->rxopt.bits.hopopts; break; case IPV6_2292HOPOPTS: val = np->rxopt.bits.ohopopts; break; case IPV6_RECVDSTOPTS: val = np->rxopt.bits.dstopts; break; case IPV6_2292DSTOPTS: val = np->rxopt.bits.odstopts; break; case IPV6_TCLASS: val = np->tclass; break; case IPV6_RECVTCLASS: val = np->rxopt.bits.rxtclass; break; case IPV6_FLOWINFO: val = np->rxopt.bits.rxflow; break; case IPV6_RECVPATHMTU: val = np->rxopt.bits.rxpmtu; break; case IPV6_PATHMTU: { struct dst_entry *dst; struct ip6_mtuinfo mtuinfo; if (len < sizeof(mtuinfo)) return -EINVAL; len = sizeof(mtuinfo); memset(&mtuinfo, 0, sizeof(mtuinfo)); rcu_read_lock(); dst = __sk_dst_get(sk); if (dst) mtuinfo.ip6m_mtu = dst_mtu(dst); rcu_read_unlock(); if (!mtuinfo.ip6m_mtu) return -ENOTCONN; if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &mtuinfo, len)) return -EFAULT; return 0; } case IPV6_TRANSPARENT: val = inet_test_bit(TRANSPARENT, sk); break; case IPV6_FREEBIND: val = inet_test_bit(FREEBIND, sk); break; case IPV6_RECVORIGDSTADDR: val = np->rxopt.bits.rxorigdstaddr; break; case IPV6_UNICAST_HOPS: case IPV6_MULTICAST_HOPS: { struct dst_entry *dst; if (optname == IPV6_UNICAST_HOPS) val = READ_ONCE(np->hop_limit); else val = READ_ONCE(np->mcast_hops); if (val < 0) { rcu_read_lock(); dst = __sk_dst_get(sk); if (dst) val = ip6_dst_hoplimit(dst); rcu_read_unlock(); } if (val < 0) val = READ_ONCE(sock_net(sk)->ipv6.devconf_all->hop_limit); break; } case IPV6_MULTICAST_LOOP: val = inet6_test_bit(MC6_LOOP, sk); break; case IPV6_MULTICAST_IF: val = READ_ONCE(np->mcast_oif); break; case IPV6_MULTICAST_ALL: val = inet6_test_bit(MC6_ALL, sk); break; case IPV6_UNICAST_IF: val = (__force int)htonl((__u32) READ_ONCE(np->ucast_oif)); break; case IPV6_MTU_DISCOVER: val = READ_ONCE(np->pmtudisc); break; case IPV6_RECVERR: val = inet6_test_bit(RECVERR6, sk); break; case IPV6_FLOWINFO_SEND: val = inet6_test_bit(SNDFLOW, sk); break; case IPV6_FLOWLABEL_MGR: { struct in6_flowlabel_req freq; int flags; if (len < sizeof(freq)) return -EINVAL; if (copy_from_sockptr(&freq, optval, sizeof(freq))) return -EFAULT; if (freq.flr_action != IPV6_FL_A_GET) return -EINVAL; len = sizeof(freq); flags = freq.flr_flags; memset(&freq, 0, sizeof(freq)); val = ipv6_flowlabel_opt_get(sk, &freq, flags); if (val < 0) return val; if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &freq, len)) return -EFAULT; return 0; } case IPV6_ADDR_PREFERENCES: { u8 srcprefs = READ_ONCE(np->srcprefs); val = 0; if (srcprefs & IPV6_PREFER_SRC_TMP) val |= IPV6_PREFER_SRC_TMP; else if (srcprefs & IPV6_PREFER_SRC_PUBLIC) val |= IPV6_PREFER_SRC_PUBLIC; else { /* XXX: should we return system default? */ val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT; } if (srcprefs & IPV6_PREFER_SRC_COA) val |= IPV6_PREFER_SRC_COA; else val |= IPV6_PREFER_SRC_HOME; break; } case IPV6_MINHOPCOUNT: val = READ_ONCE(np->min_hopcount); break; case IPV6_DONTFRAG: val = inet6_test_bit(DONTFRAG, sk); break; case IPV6_AUTOFLOWLABEL: val = ip6_autoflowlabel(sock_net(sk), sk); break; case IPV6_RECVFRAGSIZE: val = np->rxopt.bits.recvfragsize; break; case IPV6_ROUTER_ALERT: val = inet6_test_bit(RTALERT, sk); break; case IPV6_ROUTER_ALERT_ISOLATE: val = inet6_test_bit(RTALERT_ISOLATE, sk); break; case IPV6_RECVERR_RFC4884: val = inet6_test_bit(RECVERR6_RFC4884, sk); break; default: return -ENOPROTOOPT; } len = min_t(unsigned int, sizeof(int), len); if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &val, len)) return -EFAULT; return 0; } int ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { int err; if (level == SOL_IP && sk->sk_type != SOCK_RAW) return ip_getsockopt(sk, level, optname, optval, optlen); if (level != SOL_IPV6) return -ENOPROTOOPT; err = do_ipv6_getsockopt(sk, level, optname, USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) { int len; if (get_user(len, optlen)) return -EFAULT; err = nf_getsockopt(sk, PF_INET6, optname, optval, &len); if (err >= 0) err = put_user(len, optlen); } #endif return err; } EXPORT_SYMBOL(ipv6_getsockopt); |
| 4 2 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 | /* * xt_time * Copyright © CC Computer Consultants GmbH, 2007 * * based on ipt_time by Fabrice MARIE <fabrice@netfilter.org> * This is a module which is used for time matching * It is using some modified code from dietlibc (localtime() function) * that you can find at https://www.fefe.de/dietlibc/ * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from gnu.org/gpl. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/ktime.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/types.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_time.h> struct xtm { u_int8_t month; /* (1-12) */ u_int8_t monthday; /* (1-31) */ u_int8_t weekday; /* (1-7) */ u_int8_t hour; /* (0-23) */ u_int8_t minute; /* (0-59) */ u_int8_t second; /* (0-59) */ unsigned int dse; }; extern struct timezone sys_tz; /* ouch */ static const u_int16_t days_since_year[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, }; static const u_int16_t days_since_leapyear[] = { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, }; /* * Since time progresses forward, it is best to organize this array in reverse, * to minimize lookup time. */ enum { DSE_FIRST = 2039, SECONDS_PER_DAY = 86400, }; static const u_int16_t days_since_epoch[] = { /* 2039 - 2030 */ 25202, 24837, 24472, 24106, 23741, 23376, 23011, 22645, 22280, 21915, /* 2029 - 2020 */ 21550, 21184, 20819, 20454, 20089, 19723, 19358, 18993, 18628, 18262, /* 2019 - 2010 */ 17897, 17532, 17167, 16801, 16436, 16071, 15706, 15340, 14975, 14610, /* 2009 - 2000 */ 14245, 13879, 13514, 13149, 12784, 12418, 12053, 11688, 11323, 10957, /* 1999 - 1990 */ 10592, 10227, 9862, 9496, 9131, 8766, 8401, 8035, 7670, 7305, /* 1989 - 1980 */ 6940, 6574, 6209, 5844, 5479, 5113, 4748, 4383, 4018, 3652, /* 1979 - 1970 */ 3287, 2922, 2557, 2191, 1826, 1461, 1096, 730, 365, 0, }; static inline bool is_leap(unsigned int y) { return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0); } /* * Each network packet has a (nano)seconds-since-the-epoch (SSTE) timestamp. * Since we match against days and daytime, the SSTE value needs to be * computed back into human-readable dates. * * This is done in three separate functions so that the most expensive * calculations are done last, in case a "simple match" can be found earlier. */ static inline unsigned int localtime_1(struct xtm *r, time64_t time) { unsigned int v, w; /* Each day has 86400s, so finding the hour/minute is actually easy. */ div_u64_rem(time, SECONDS_PER_DAY, &v); r->second = v % 60; w = v / 60; r->minute = w % 60; r->hour = w / 60; return v; } static inline void localtime_2(struct xtm *r, time64_t time) { /* * Here comes the rest (weekday, monthday). First, divide the SSTE * by seconds-per-day to get the number of _days_ since the epoch. */ r->dse = div_u64(time, SECONDS_PER_DAY); /* * 1970-01-01 (w=0) was a Thursday (4). * -1 and +1 map Sunday properly onto 7. */ r->weekday = (4 + r->dse - 1) % 7 + 1; } static void localtime_3(struct xtm *r, time64_t time) { unsigned int year, i, w = r->dse; /* * In each year, a certain number of days-since-the-epoch have passed. * Find the year that is closest to said days. * * Consider, for example, w=21612 (2029-03-04). Loop will abort on * dse[i] <= w, which happens when dse[i] == 21550. This implies * year == 2009. w will then be 62. */ for (i = 0, year = DSE_FIRST; days_since_epoch[i] > w; ++i, --year) /* just loop */; w -= days_since_epoch[i]; /* * By now we have the current year, and the day of the year. * r->yearday = w; * * On to finding the month (like above). In each month, a certain * number of days-since-New Year have passed, and find the closest * one. * * Consider w=62 (in a non-leap year). Loop will abort on * dsy[i] < w, which happens when dsy[i] == 31+28 (i == 2). * Concludes i == 2, i.e. 3rd month => March. * * (A different approach to use would be to subtract a monthlength * from w repeatedly while counting.) */ if (is_leap(year)) { /* use days_since_leapyear[] in a leap year */ for (i = ARRAY_SIZE(days_since_leapyear) - 1; i > 0 && days_since_leapyear[i] > w; --i) /* just loop */; r->monthday = w - days_since_leapyear[i] + 1; } else { for (i = ARRAY_SIZE(days_since_year) - 1; i > 0 && days_since_year[i] > w; --i) /* just loop */; r->monthday = w - days_since_year[i] + 1; } r->month = i + 1; } static bool time_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_time_info *info = par->matchinfo; unsigned int packet_time; struct xtm current_time; time64_t stamp; /* * We need real time here, but we can neither use skb->tstamp * nor __net_timestamp(). * * skb->tstamp and skb->skb_mstamp_ns overlap, however, they * use different clock types (real vs monotonic). * * Suppose you have two rules: * 1. match before 13:00 * 2. match after 13:00 * * If you match against processing time (ktime_get_real_seconds) it * may happen that the same packet matches both rules if * it arrived at the right moment before 13:00, so it would be * better to check skb->tstamp and set it via __net_timestamp() * if needed. This however breaks outgoing packets tx timestamp, * and causes them to get delayed forever by fq packet scheduler. */ stamp = ktime_get_real_seconds(); if (info->flags & XT_TIME_LOCAL_TZ) /* Adjust for local timezone */ stamp -= 60 * sys_tz.tz_minuteswest; /* * xt_time will match when _all_ of the following hold: * - 'now' is in the global time range date_start..date_end * - 'now' is in the monthday mask * - 'now' is in the weekday mask * - 'now' is in the daytime range time_start..time_end * (and by default, libxt_time will set these so as to match) * * note: info->date_start/stop are unsigned 32-bit values that * can hold values beyond y2038, but not after y2106. */ if (stamp < info->date_start || stamp > info->date_stop) return false; packet_time = localtime_1(¤t_time, stamp); if (info->daytime_start < info->daytime_stop) { if (packet_time < info->daytime_start || packet_time > info->daytime_stop) return false; } else { if (packet_time < info->daytime_start && packet_time > info->daytime_stop) return false; /** if user asked to ignore 'next day', then e.g. * '1 PM Wed, August 1st' should be treated * like 'Tue 1 PM July 31st'. * * This also causes * 'Monday, "23:00 to 01:00", to match for 2 hours, starting * Monday 23:00 to Tuesday 01:00. */ if ((info->flags & XT_TIME_CONTIGUOUS) && packet_time <= info->daytime_stop) stamp -= SECONDS_PER_DAY; } localtime_2(¤t_time, stamp); if (!(info->weekdays_match & (1 << current_time.weekday))) return false; /* Do not spend time computing monthday if all days match anyway */ if (info->monthdays_match != XT_TIME_ALL_MONTHDAYS) { localtime_3(¤t_time, stamp); if (!(info->monthdays_match & (1 << current_time.monthday))) return false; } return true; } static int time_mt_check(const struct xt_mtchk_param *par) { const struct xt_time_info *info = par->matchinfo; if (info->daytime_start > XT_TIME_MAX_DAYTIME || info->daytime_stop > XT_TIME_MAX_DAYTIME) { pr_info_ratelimited("invalid argument - start or stop time greater than 23:59:59\n"); return -EDOM; } if (info->flags & ~XT_TIME_ALL_FLAGS) { pr_info_ratelimited("unknown flags 0x%x\n", info->flags & ~XT_TIME_ALL_FLAGS); return -EINVAL; } if ((info->flags & XT_TIME_CONTIGUOUS) && info->daytime_start < info->daytime_stop) return -EINVAL; return 0; } static struct xt_match xt_time_mt_reg __read_mostly = { .name = "time", .family = NFPROTO_UNSPEC, .match = time_mt, .checkentry = time_mt_check, .matchsize = sizeof(struct xt_time_info), .me = THIS_MODULE, }; static int __init time_mt_init(void) { int minutes = sys_tz.tz_minuteswest; if (minutes < 0) /* east of Greenwich */ pr_info("kernel timezone is +%02d%02d\n", -minutes / 60, -minutes % 60); else /* west of Greenwich */ pr_info("kernel timezone is -%02d%02d\n", minutes / 60, minutes % 60); return xt_register_match(&xt_time_mt_reg); } static void __exit time_mt_exit(void) { xt_unregister_match(&xt_time_mt_reg); } module_init(time_mt_init); module_exit(time_mt_exit); MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: time-based matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_time"); MODULE_ALIAS("ip6t_time"); |
| 4 3 2 3 2 1 10 60 54 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Symmetric key ciphers. * * Copyright (c) 2007-2015 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_SKCIPHER_H #define _CRYPTO_SKCIPHER_H #include <linux/atomic.h> #include <linux/container_of.h> #include <linux/crypto.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/types.h> /* Set this bit if the lskcipher operation is a continuation. */ #define CRYPTO_LSKCIPHER_FLAG_CONT 0x00000001 /* Set this bit if the lskcipher operation is final. */ #define CRYPTO_LSKCIPHER_FLAG_FINAL 0x00000002 /* The bit CRYPTO_TFM_REQ_MAY_SLEEP can also be set if needed. */ /* Set this bit if the skcipher operation is a continuation. */ #define CRYPTO_SKCIPHER_REQ_CONT 0x00000001 /* Set this bit if the skcipher operation is not final. */ #define CRYPTO_SKCIPHER_REQ_NOTFINAL 0x00000002 struct scatterlist; /** * struct skcipher_request - Symmetric key cipher request * @cryptlen: Number of bytes to encrypt or decrypt * @iv: Initialisation Vector * @src: Source SG list * @dst: Destination SG list * @base: Underlying async request * @__ctx: Start of private context data */ struct skcipher_request { unsigned int cryptlen; u8 *iv; struct scatterlist *src; struct scatterlist *dst; struct crypto_async_request base; void *__ctx[] CRYPTO_MINALIGN_ATTR; }; struct crypto_skcipher { unsigned int reqsize; struct crypto_tfm base; }; struct crypto_sync_skcipher { struct crypto_skcipher base; }; struct crypto_lskcipher { struct crypto_tfm base; }; /* * struct skcipher_alg_common - common properties of skcipher_alg * @min_keysize: Minimum key size supported by the transformation. This is the * smallest key length supported by this transformation algorithm. * This must be set to one of the pre-defined values as this is * not hardware specific. Possible values for this field can be * found via git grep "_MIN_KEY_SIZE" include/crypto/ * @max_keysize: Maximum key size supported by the transformation. This is the * largest key length supported by this transformation algorithm. * This must be set to one of the pre-defined values as this is * not hardware specific. Possible values for this field can be * found via git grep "_MAX_KEY_SIZE" include/crypto/ * @ivsize: IV size applicable for transformation. The consumer must provide an * IV of exactly that size to perform the encrypt or decrypt operation. * @chunksize: Equal to the block size except for stream ciphers such as * CTR where it is set to the underlying block size. * @statesize: Size of the internal state for the algorithm. * @base: Definition of a generic crypto algorithm. */ #define SKCIPHER_ALG_COMMON { \ unsigned int min_keysize; \ unsigned int max_keysize; \ unsigned int ivsize; \ unsigned int chunksize; \ unsigned int statesize; \ \ struct crypto_alg base; \ } struct skcipher_alg_common SKCIPHER_ALG_COMMON; /** * struct skcipher_alg - symmetric key cipher definition * @setkey: Set key for the transformation. This function is used to either * program a supplied key into the hardware or store the key in the * transformation context for programming it later. Note that this * function does modify the transformation context. This function can * be called multiple times during the existence of the transformation * object, so one must make sure the key is properly reprogrammed into * the hardware. This function is also responsible for checking the key * length for validity. In case a software fallback was put in place in * the @cra_init call, this function might need to use the fallback if * the algorithm doesn't support all of the key sizes. * @encrypt: Encrypt a scatterlist of blocks. This function is used to encrypt * the supplied scatterlist containing the blocks of data. The crypto * API consumer is responsible for aligning the entries of the * scatterlist properly and making sure the chunks are correctly * sized. In case a software fallback was put in place in the * @cra_init call, this function might need to use the fallback if * the algorithm doesn't support all of the key sizes. In case the * key was stored in transformation context, the key might need to be * re-programmed into the hardware in this function. This function * shall not modify the transformation context, as this function may * be called in parallel with the same transformation object. * @decrypt: Decrypt a single block. This is a reverse counterpart to @encrypt * and the conditions are exactly the same. * @export: Export partial state of the transformation. This function dumps the * entire state of the ongoing transformation into a provided block of * data so it can be @import 'ed back later on. This is useful in case * you want to save partial result of the transformation after * processing certain amount of data and reload this partial result * multiple times later on for multiple re-use. No data processing * happens at this point. * @import: Import partial state of the transformation. This function loads the * entire state of the ongoing transformation from a provided block of * data so the transformation can continue from this point onward. No * data processing happens at this point. * @init: Initialize the cryptographic transformation object. This function * is used to initialize the cryptographic transformation object. * This function is called only once at the instantiation time, right * after the transformation context was allocated. In case the * cryptographic hardware has some special requirements which need to * be handled by software, this function shall check for the precise * requirement of the transformation and put any software fallbacks * in place. * @exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @init, used to remove various changes set in * @init. * @walksize: Equal to the chunk size except in cases where the algorithm is * considerably more efficient if it can operate on multiple chunks * in parallel. Should be a multiple of chunksize. * @co: see struct skcipher_alg_common * * All fields except @ivsize are mandatory and must be filled. */ struct skcipher_alg { int (*setkey)(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); int (*encrypt)(struct skcipher_request *req); int (*decrypt)(struct skcipher_request *req); int (*export)(struct skcipher_request *req, void *out); int (*import)(struct skcipher_request *req, const void *in); int (*init)(struct crypto_skcipher *tfm); void (*exit)(struct crypto_skcipher *tfm); unsigned int walksize; union { struct SKCIPHER_ALG_COMMON; struct skcipher_alg_common co; }; }; /** * struct lskcipher_alg - linear symmetric key cipher definition * @setkey: Set key for the transformation. This function is used to either * program a supplied key into the hardware or store the key in the * transformation context for programming it later. Note that this * function does modify the transformation context. This function can * be called multiple times during the existence of the transformation * object, so one must make sure the key is properly reprogrammed into * the hardware. This function is also responsible for checking the key * length for validity. In case a software fallback was put in place in * the @cra_init call, this function might need to use the fallback if * the algorithm doesn't support all of the key sizes. * @encrypt: Encrypt a number of bytes. This function is used to encrypt * the supplied data. This function shall not modify * the transformation context, as this function may be called * in parallel with the same transformation object. Data * may be left over if length is not a multiple of blocks * and there is more to come (final == false). The number of * left-over bytes should be returned in case of success. * The siv field shall be as long as ivsize + statesize with * the IV placed at the front. The state will be used by the * algorithm internally. * @decrypt: Decrypt a number of bytes. This is a reverse counterpart to * @encrypt and the conditions are exactly the same. * @init: Initialize the cryptographic transformation object. This function * is used to initialize the cryptographic transformation object. * This function is called only once at the instantiation time, right * after the transformation context was allocated. * @exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @init, used to remove various changes set in * @init. * @co: see struct skcipher_alg_common */ struct lskcipher_alg { int (*setkey)(struct crypto_lskcipher *tfm, const u8 *key, unsigned int keylen); int (*encrypt)(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *siv, u32 flags); int (*decrypt)(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *siv, u32 flags); int (*init)(struct crypto_lskcipher *tfm); void (*exit)(struct crypto_lskcipher *tfm); struct skcipher_alg_common co; }; #define MAX_SYNC_SKCIPHER_REQSIZE 384 /* * This performs a type-check against the "_tfm" argument to make sure * all users have the correct skcipher tfm for doing on-stack requests. */ #define SYNC_SKCIPHER_REQUEST_ON_STACK(name, _tfm) \ char __##name##_desc[sizeof(struct skcipher_request) + \ MAX_SYNC_SKCIPHER_REQSIZE \ ] CRYPTO_MINALIGN_ATTR; \ struct skcipher_request *name = \ (((struct skcipher_request *)__##name##_desc)->base.tfm = \ crypto_sync_skcipher_tfm((_tfm)), \ (void *)__##name##_desc) /** * DOC: Symmetric Key Cipher API * * Symmetric key cipher API is used with the ciphers of type * CRYPTO_ALG_TYPE_SKCIPHER (listed as type "skcipher" in /proc/crypto). * * Asynchronous cipher operations imply that the function invocation for a * cipher request returns immediately before the completion of the operation. * The cipher request is scheduled as a separate kernel thread and therefore * load-balanced on the different CPUs via the process scheduler. To allow * the kernel crypto API to inform the caller about the completion of a cipher * request, the caller must provide a callback function. That function is * invoked with the cipher handle when the request completes. * * To support the asynchronous operation, additional information than just the * cipher handle must be supplied to the kernel crypto API. That additional * information is given by filling in the skcipher_request data structure. * * For the symmetric key cipher API, the state is maintained with the tfm * cipher handle. A single tfm can be used across multiple calls and in * parallel. For asynchronous block cipher calls, context data supplied and * only used by the caller can be referenced the request data structure in * addition to the IV used for the cipher request. The maintenance of such * state information would be important for a crypto driver implementer to * have, because when calling the callback function upon completion of the * cipher operation, that callback function may need some information about * which operation just finished if it invoked multiple in parallel. This * state information is unused by the kernel crypto API. */ static inline struct crypto_skcipher *__crypto_skcipher_cast( struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_skcipher, base); } /** * crypto_alloc_skcipher() - allocate symmetric key cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * skcipher cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for an skcipher. The returned struct * crypto_skcipher is the cipher handle that is required for any subsequent * API invocation for that skcipher. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name, u32 type, u32 mask); struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(const char *alg_name, u32 type, u32 mask); /** * crypto_alloc_lskcipher() - allocate linear symmetric key cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * lskcipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for an lskcipher. The returned struct * crypto_lskcipher is the cipher handle that is required for any subsequent * API invocation for that lskcipher. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ struct crypto_lskcipher *crypto_alloc_lskcipher(const char *alg_name, u32 type, u32 mask); static inline struct crypto_tfm *crypto_skcipher_tfm( struct crypto_skcipher *tfm) { return &tfm->base; } static inline struct crypto_tfm *crypto_lskcipher_tfm( struct crypto_lskcipher *tfm) { return &tfm->base; } static inline struct crypto_tfm *crypto_sync_skcipher_tfm( struct crypto_sync_skcipher *tfm) { return crypto_skcipher_tfm(&tfm->base); } /** * crypto_free_skcipher() - zeroize and free cipher handle * @tfm: cipher handle to be freed * * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_skcipher(struct crypto_skcipher *tfm) { crypto_destroy_tfm(tfm, crypto_skcipher_tfm(tfm)); } static inline void crypto_free_sync_skcipher(struct crypto_sync_skcipher *tfm) { crypto_free_skcipher(&tfm->base); } /** * crypto_free_lskcipher() - zeroize and free cipher handle * @tfm: cipher handle to be freed * * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_lskcipher(struct crypto_lskcipher *tfm) { crypto_destroy_tfm(tfm, crypto_lskcipher_tfm(tfm)); } /** * crypto_has_skcipher() - Search for the availability of an skcipher. * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * skcipher * @type: specifies the type of the skcipher * @mask: specifies the mask for the skcipher * * Return: true when the skcipher is known to the kernel crypto API; false * otherwise */ int crypto_has_skcipher(const char *alg_name, u32 type, u32 mask); static inline const char *crypto_skcipher_driver_name( struct crypto_skcipher *tfm) { return crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)); } static inline const char *crypto_lskcipher_driver_name( struct crypto_lskcipher *tfm) { return crypto_tfm_alg_driver_name(crypto_lskcipher_tfm(tfm)); } static inline struct skcipher_alg_common *crypto_skcipher_alg_common( struct crypto_skcipher *tfm) { return container_of(crypto_skcipher_tfm(tfm)->__crt_alg, struct skcipher_alg_common, base); } static inline struct skcipher_alg *crypto_skcipher_alg( struct crypto_skcipher *tfm) { return container_of(crypto_skcipher_tfm(tfm)->__crt_alg, struct skcipher_alg, base); } static inline struct lskcipher_alg *crypto_lskcipher_alg( struct crypto_lskcipher *tfm) { return container_of(crypto_lskcipher_tfm(tfm)->__crt_alg, struct lskcipher_alg, co.base); } /** * crypto_skcipher_ivsize() - obtain IV size * @tfm: cipher handle * * The size of the IV for the skcipher referenced by the cipher handle is * returned. This IV size may be zero if the cipher does not need an IV. * * Return: IV size in bytes */ static inline unsigned int crypto_skcipher_ivsize(struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->ivsize; } static inline unsigned int crypto_sync_skcipher_ivsize( struct crypto_sync_skcipher *tfm) { return crypto_skcipher_ivsize(&tfm->base); } /** * crypto_lskcipher_ivsize() - obtain IV size * @tfm: cipher handle * * The size of the IV for the lskcipher referenced by the cipher handle is * returned. This IV size may be zero if the cipher does not need an IV. * * Return: IV size in bytes */ static inline unsigned int crypto_lskcipher_ivsize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.ivsize; } /** * crypto_skcipher_blocksize() - obtain block size of cipher * @tfm: cipher handle * * The block size for the skcipher referenced with the cipher handle is * returned. The caller may use that information to allocate appropriate * memory for the data returned by the encryption or decryption operation * * Return: block size of cipher */ static inline unsigned int crypto_skcipher_blocksize( struct crypto_skcipher *tfm) { return crypto_tfm_alg_blocksize(crypto_skcipher_tfm(tfm)); } /** * crypto_lskcipher_blocksize() - obtain block size of cipher * @tfm: cipher handle * * The block size for the lskcipher referenced with the cipher handle is * returned. The caller may use that information to allocate appropriate * memory for the data returned by the encryption or decryption operation * * Return: block size of cipher */ static inline unsigned int crypto_lskcipher_blocksize( struct crypto_lskcipher *tfm) { return crypto_tfm_alg_blocksize(crypto_lskcipher_tfm(tfm)); } /** * crypto_skcipher_chunksize() - obtain chunk size * @tfm: cipher handle * * The block size is set to one for ciphers such as CTR. However, * you still need to provide incremental updates in multiples of * the underlying block size as the IV does not have sub-block * granularity. This is known in this API as the chunk size. * * Return: chunk size in bytes */ static inline unsigned int crypto_skcipher_chunksize( struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->chunksize; } /** * crypto_lskcipher_chunksize() - obtain chunk size * @tfm: cipher handle * * The block size is set to one for ciphers such as CTR. However, * you still need to provide incremental updates in multiples of * the underlying block size as the IV does not have sub-block * granularity. This is known in this API as the chunk size. * * Return: chunk size in bytes */ static inline unsigned int crypto_lskcipher_chunksize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.chunksize; } /** * crypto_skcipher_statesize() - obtain state size * @tfm: cipher handle * * Some algorithms cannot be chained with the IV alone. They carry * internal state which must be replicated if data is to be processed * incrementally. The size of that state can be obtained with this * function. * * Return: state size in bytes */ static inline unsigned int crypto_skcipher_statesize( struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->statesize; } /** * crypto_lskcipher_statesize() - obtain state size * @tfm: cipher handle * * Some algorithms cannot be chained with the IV alone. They carry * internal state which must be replicated if data is to be processed * incrementally. The size of that state can be obtained with this * function. * * Return: state size in bytes */ static inline unsigned int crypto_lskcipher_statesize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.statesize; } static inline unsigned int crypto_sync_skcipher_blocksize( struct crypto_sync_skcipher *tfm) { return crypto_skcipher_blocksize(&tfm->base); } static inline unsigned int crypto_skcipher_alignmask( struct crypto_skcipher *tfm) { return crypto_tfm_alg_alignmask(crypto_skcipher_tfm(tfm)); } static inline unsigned int crypto_lskcipher_alignmask( struct crypto_lskcipher *tfm) { return crypto_tfm_alg_alignmask(crypto_lskcipher_tfm(tfm)); } static inline u32 crypto_skcipher_get_flags(struct crypto_skcipher *tfm) { return crypto_tfm_get_flags(crypto_skcipher_tfm(tfm)); } static inline void crypto_skcipher_set_flags(struct crypto_skcipher *tfm, u32 flags) { crypto_tfm_set_flags(crypto_skcipher_tfm(tfm), flags); } static inline void crypto_skcipher_clear_flags(struct crypto_skcipher *tfm, u32 flags) { crypto_tfm_clear_flags(crypto_skcipher_tfm(tfm), flags); } static inline u32 crypto_sync_skcipher_get_flags( struct crypto_sync_skcipher *tfm) { return crypto_skcipher_get_flags(&tfm->base); } static inline void crypto_sync_skcipher_set_flags( struct crypto_sync_skcipher *tfm, u32 flags) { crypto_skcipher_set_flags(&tfm->base, flags); } static inline void crypto_sync_skcipher_clear_flags( struct crypto_sync_skcipher *tfm, u32 flags) { crypto_skcipher_clear_flags(&tfm->base, flags); } static inline u32 crypto_lskcipher_get_flags(struct crypto_lskcipher *tfm) { return crypto_tfm_get_flags(crypto_lskcipher_tfm(tfm)); } static inline void crypto_lskcipher_set_flags(struct crypto_lskcipher *tfm, u32 flags) { crypto_tfm_set_flags(crypto_lskcipher_tfm(tfm), flags); } static inline void crypto_lskcipher_clear_flags(struct crypto_lskcipher *tfm, u32 flags) { crypto_tfm_clear_flags(crypto_lskcipher_tfm(tfm), flags); } /** * crypto_skcipher_setkey() - set key for cipher * @tfm: cipher handle * @key: buffer holding the key * @keylen: length of the key in bytes * * The caller provided key is set for the skcipher referenced by the cipher * handle. * * Note, the key length determines the cipher type. Many block ciphers implement * different cipher modes depending on the key size, such as AES-128 vs AES-192 * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 * is performed. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); static inline int crypto_sync_skcipher_setkey(struct crypto_sync_skcipher *tfm, const u8 *key, unsigned int keylen) { return crypto_skcipher_setkey(&tfm->base, key, keylen); } /** * crypto_lskcipher_setkey() - set key for cipher * @tfm: cipher handle * @key: buffer holding the key * @keylen: length of the key in bytes * * The caller provided key is set for the lskcipher referenced by the cipher * handle. * * Note, the key length determines the cipher type. Many block ciphers implement * different cipher modes depending on the key size, such as AES-128 vs AES-192 * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 * is performed. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_lskcipher_setkey(struct crypto_lskcipher *tfm, const u8 *key, unsigned int keylen); static inline unsigned int crypto_skcipher_min_keysize( struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->min_keysize; } static inline unsigned int crypto_skcipher_max_keysize( struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->max_keysize; } static inline unsigned int crypto_lskcipher_min_keysize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.min_keysize; } static inline unsigned int crypto_lskcipher_max_keysize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.max_keysize; } /** * crypto_skcipher_reqtfm() - obtain cipher handle from request * @req: skcipher_request out of which the cipher handle is to be obtained * * Return the crypto_skcipher handle when furnishing an skcipher_request * data structure. * * Return: crypto_skcipher handle */ static inline struct crypto_skcipher *crypto_skcipher_reqtfm( struct skcipher_request *req) { return __crypto_skcipher_cast(req->base.tfm); } static inline struct crypto_sync_skcipher *crypto_sync_skcipher_reqtfm( struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); return container_of(tfm, struct crypto_sync_skcipher, base); } /** * crypto_skcipher_encrypt() - encrypt plaintext * @req: reference to the skcipher_request handle that holds all information * needed to perform the cipher operation * * Encrypt plaintext data using the skcipher_request handle. That data * structure and how it is filled with data is discussed with the * skcipher_request_* functions. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_skcipher_encrypt(struct skcipher_request *req); /** * crypto_skcipher_decrypt() - decrypt ciphertext * @req: reference to the skcipher_request handle that holds all information * needed to perform the cipher operation * * Decrypt ciphertext data using the skcipher_request handle. That data * structure and how it is filled with data is discussed with the * skcipher_request_* functions. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_skcipher_decrypt(struct skcipher_request *req); /** * crypto_skcipher_export() - export partial state * @req: reference to the skcipher_request handle that holds all information * needed to perform the operation * @out: output buffer of sufficient size that can hold the state * * Export partial state of the transformation. This function dumps the * entire state of the ongoing transformation into a provided block of * data so it can be @import 'ed back later on. This is useful in case * you want to save partial result of the transformation after * processing certain amount of data and reload this partial result * multiple times later on for multiple re-use. No data processing * happens at this point. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_skcipher_export(struct skcipher_request *req, void *out); /** * crypto_skcipher_import() - import partial state * @req: reference to the skcipher_request handle that holds all information * needed to perform the operation * @in: buffer holding the state * * Import partial state of the transformation. This function loads the * entire state of the ongoing transformation from a provided block of * data so the transformation can continue from this point onward. No * data processing happens at this point. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_skcipher_import(struct skcipher_request *req, const void *in); /** * crypto_lskcipher_encrypt() - encrypt plaintext * @tfm: lskcipher handle * @src: source buffer * @dst: destination buffer * @len: number of bytes to process * @siv: IV + state for the cipher operation. The length of the IV must * comply with the IV size defined by crypto_lskcipher_ivsize. The * IV is then followed with a buffer with the length as specified by * crypto_lskcipher_statesize. * Encrypt plaintext data using the lskcipher handle. * * Return: >=0 if the cipher operation was successful, if positive * then this many bytes have been left unprocessed; * < 0 if an error occurred */ int crypto_lskcipher_encrypt(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *siv); /** * crypto_lskcipher_decrypt() - decrypt ciphertext * @tfm: lskcipher handle * @src: source buffer * @dst: destination buffer * @len: number of bytes to process * @siv: IV + state for the cipher operation. The length of the IV must * comply with the IV size defined by crypto_lskcipher_ivsize. The * IV is then followed with a buffer with the length as specified by * crypto_lskcipher_statesize. * * Decrypt ciphertext data using the lskcipher handle. * * Return: >=0 if the cipher operation was successful, if positive * then this many bytes have been left unprocessed; * < 0 if an error occurred */ int crypto_lskcipher_decrypt(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *siv); /** * DOC: Symmetric Key Cipher Request Handle * * The skcipher_request data structure contains all pointers to data * required for the symmetric key cipher operation. This includes the cipher * handle (which can be used by multiple skcipher_request instances), pointer * to plaintext and ciphertext, asynchronous callback function, etc. It acts * as a handle to the skcipher_request_* API calls in a similar way as * skcipher handle to the crypto_skcipher_* API calls. */ /** * crypto_skcipher_reqsize() - obtain size of the request data structure * @tfm: cipher handle * * Return: number of bytes */ static inline unsigned int crypto_skcipher_reqsize(struct crypto_skcipher *tfm) { return tfm->reqsize; } /** * skcipher_request_set_tfm() - update cipher handle reference in request * @req: request handle to be modified * @tfm: cipher handle that shall be added to the request handle * * Allow the caller to replace the existing skcipher handle in the request * data structure with a different one. */ static inline void skcipher_request_set_tfm(struct skcipher_request *req, struct crypto_skcipher *tfm) { req->base.tfm = crypto_skcipher_tfm(tfm); } static inline void skcipher_request_set_sync_tfm(struct skcipher_request *req, struct crypto_sync_skcipher *tfm) { skcipher_request_set_tfm(req, &tfm->base); } static inline struct skcipher_request *skcipher_request_cast( struct crypto_async_request *req) { return container_of(req, struct skcipher_request, base); } /** * skcipher_request_alloc() - allocate request data structure * @tfm: cipher handle to be registered with the request * @gfp: memory allocation flag that is handed to kmalloc by the API call. * * Allocate the request data structure that must be used with the skcipher * encrypt and decrypt API calls. During the allocation, the provided skcipher * handle is registered in the request data structure. * * Return: allocated request handle in case of success, or NULL if out of memory */ static inline struct skcipher_request *skcipher_request_alloc_noprof( struct crypto_skcipher *tfm, gfp_t gfp) { struct skcipher_request *req; req = kmalloc_noprof(sizeof(struct skcipher_request) + crypto_skcipher_reqsize(tfm), gfp); if (likely(req)) skcipher_request_set_tfm(req, tfm); return req; } #define skcipher_request_alloc(...) alloc_hooks(skcipher_request_alloc_noprof(__VA_ARGS__)) /** * skcipher_request_free() - zeroize and free request data structure * @req: request data structure cipher handle to be freed */ static inline void skcipher_request_free(struct skcipher_request *req) { kfree_sensitive(req); } static inline void skcipher_request_zero(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); memzero_explicit(req, sizeof(*req) + crypto_skcipher_reqsize(tfm)); } /** * skcipher_request_set_callback() - set asynchronous callback function * @req: request handle * @flags: specify zero or an ORing of the flags * CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and * increase the wait queue beyond the initial maximum size; * CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep * @compl: callback function pointer to be registered with the request handle * @data: The data pointer refers to memory that is not used by the kernel * crypto API, but provided to the callback function for it to use. Here, * the caller can provide a reference to memory the callback function can * operate on. As the callback function is invoked asynchronously to the * related functionality, it may need to access data structures of the * related functionality which can be referenced using this pointer. The * callback function can access the memory via the "data" field in the * crypto_async_request data structure provided to the callback function. * * This function allows setting the callback function that is triggered once the * cipher operation completes. * * The callback function is registered with the skcipher_request handle and * must comply with the following template:: * * void callback_function(struct crypto_async_request *req, int error) */ static inline void skcipher_request_set_callback(struct skcipher_request *req, u32 flags, crypto_completion_t compl, void *data) { req->base.complete = compl; req->base.data = data; req->base.flags = flags; } /** * skcipher_request_set_crypt() - set data buffers * @req: request handle * @src: source scatter / gather list * @dst: destination scatter / gather list * @cryptlen: number of bytes to process from @src * @iv: IV for the cipher operation which must comply with the IV size defined * by crypto_skcipher_ivsize * * This function allows setting of the source data and destination data * scatter / gather lists. * * For encryption, the source is treated as the plaintext and the * destination is the ciphertext. For a decryption operation, the use is * reversed - the source is the ciphertext and the destination is the plaintext. */ static inline void skcipher_request_set_crypt( struct skcipher_request *req, struct scatterlist *src, struct scatterlist *dst, unsigned int cryptlen, void *iv) { req->src = src; req->dst = dst; req->cryptlen = cryptlen; req->iv = iv; } #endif /* _CRYPTO_SKCIPHER_H */ |
| 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Stadia controller rumble support. * * Copyright 2023 Google LLC */ #include <linux/hid.h> #include <linux/input.h> #include <linux/slab.h> #include <linux/module.h> #include "hid-ids.h" #define STADIA_FF_REPORT_ID 5 struct stadiaff_device { struct hid_device *hid; struct hid_report *report; spinlock_t lock; bool removed; uint16_t strong_magnitude; uint16_t weak_magnitude; struct work_struct work; }; static void stadiaff_work(struct work_struct *work) { struct stadiaff_device *stadiaff = container_of(work, struct stadiaff_device, work); struct hid_field *rumble_field = stadiaff->report->field[0]; unsigned long flags; spin_lock_irqsave(&stadiaff->lock, flags); rumble_field->value[0] = stadiaff->strong_magnitude; rumble_field->value[1] = stadiaff->weak_magnitude; spin_unlock_irqrestore(&stadiaff->lock, flags); hid_hw_request(stadiaff->hid, stadiaff->report, HID_REQ_SET_REPORT); } static int stadiaff_play(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct stadiaff_device *stadiaff = hid_get_drvdata(hid); unsigned long flags; spin_lock_irqsave(&stadiaff->lock, flags); if (!stadiaff->removed) { stadiaff->strong_magnitude = effect->u.rumble.strong_magnitude; stadiaff->weak_magnitude = effect->u.rumble.weak_magnitude; schedule_work(&stadiaff->work); } spin_unlock_irqrestore(&stadiaff->lock, flags); return 0; } static int stadiaff_init(struct hid_device *hid) { struct stadiaff_device *stadiaff; struct hid_report *report; struct hid_input *hidinput; struct input_dev *dev; int error; if (list_empty(&hid->inputs)) { hid_err(hid, "no inputs found\n"); return -ENODEV; } hidinput = list_entry(hid->inputs.next, struct hid_input, list); dev = hidinput->input; report = hid_validate_values(hid, HID_OUTPUT_REPORT, STADIA_FF_REPORT_ID, 0, 2); if (!report) return -ENODEV; stadiaff = devm_kzalloc(&hid->dev, sizeof(struct stadiaff_device), GFP_KERNEL); if (!stadiaff) return -ENOMEM; hid_set_drvdata(hid, stadiaff); input_set_capability(dev, EV_FF, FF_RUMBLE); error = input_ff_create_memless(dev, NULL, stadiaff_play); if (error) return error; stadiaff->removed = false; stadiaff->hid = hid; stadiaff->report = report; INIT_WORK(&stadiaff->work, stadiaff_work); spin_lock_init(&stadiaff->lock); hid_info(hid, "Force Feedback for Google Stadia controller\n"); return 0; } static int stadia_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); return ret; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF); if (ret) { hid_err(hdev, "hw start failed\n"); return ret; } ret = stadiaff_init(hdev); if (ret) { hid_err(hdev, "force feedback init failed\n"); hid_hw_stop(hdev); return ret; } return 0; } static void stadia_remove(struct hid_device *hid) { struct stadiaff_device *stadiaff = hid_get_drvdata(hid); unsigned long flags; spin_lock_irqsave(&stadiaff->lock, flags); stadiaff->removed = true; spin_unlock_irqrestore(&stadiaff->lock, flags); cancel_work_sync(&stadiaff->work); hid_hw_stop(hid); } static const struct hid_device_id stadia_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STADIA) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STADIA) }, { } }; MODULE_DEVICE_TABLE(hid, stadia_devices); static struct hid_driver stadia_driver = { .name = "stadia", .id_table = stadia_devices, .probe = stadia_probe, .remove = stadia_remove, }; module_hid_driver(stadia_driver); MODULE_DESCRIPTION("Google Stadia controller rumble support."); MODULE_LICENSE("GPL"); |
| 5 3 1 125 126 63 168 169 4 166 167 1 1 166 49 166 44 166 169 168 48 134 132 125 127 7 7 6 3 1 1 2 13 2 1 3 2 1 1 4 3 2 1 2 8 6 2 2 3 1 6 6 3 1 2 2 7 3 2 1 1 1 6 5 9 5 5 3 2 1 1 1 1 1 1 1 1 1 8 8 1 1 3 1 4 1 2 2 1 1 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 | // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001 Intel Corp. * * This file is part of the SCTP kernel implementation * * This file contains sctp stream maniuplation primitives and helpers. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Xin Long <lucien.xin@gmail.com> */ #include <linux/list.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/stream_sched.h> static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt) { struct sctp_association *asoc; struct sctp_chunk *ch, *temp; struct sctp_outq *outq; asoc = container_of(stream, struct sctp_association, stream); outq = &asoc->outqueue; list_for_each_entry_safe(ch, temp, &outq->out_chunk_list, list) { __u16 sid = sctp_chunk_stream_no(ch); if (sid < outcnt) continue; sctp_sched_dequeue_common(outq, ch); /* No need to call dequeue_done here because * the chunks are not scheduled by now. */ /* Mark as failed send. */ sctp_chunk_fail(ch, (__force __u32)SCTP_ERROR_INV_STRM); if (asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags)) asoc->sent_cnt_removable--; sctp_chunk_free(ch); } } static void sctp_stream_free_ext(struct sctp_stream *stream, __u16 sid) { const struct sctp_sched_ops *sched; if (!SCTP_SO(stream, sid)->ext) return; sched = sctp_sched_ops_from_stream(stream); sched->free_sid(stream, sid); kfree(SCTP_SO(stream, sid)->ext); SCTP_SO(stream, sid)->ext = NULL; } /* Migrates chunks from stream queues to new stream queues if needed, * but not across associations. Also, removes those chunks to streams * higher than the new max. */ static void sctp_stream_outq_migrate(struct sctp_stream *stream, struct sctp_stream *new, __u16 outcnt) { int i; if (stream->outcnt > outcnt) sctp_stream_shrink_out(stream, outcnt); if (new) { /* Here we actually move the old ext stuff into the new * buffer, because we want to keep it. Then * sctp_stream_update will swap ->out pointers. */ for (i = 0; i < outcnt; i++) { sctp_stream_free_ext(new, i); SCTP_SO(new, i)->ext = SCTP_SO(stream, i)->ext; SCTP_SO(stream, i)->ext = NULL; } } for (i = outcnt; i < stream->outcnt; i++) sctp_stream_free_ext(stream, i); } static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, gfp_t gfp) { int ret; if (outcnt <= stream->outcnt) goto out; ret = genradix_prealloc(&stream->out, outcnt, gfp); if (ret) return ret; out: stream->outcnt = outcnt; return 0; } static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt, gfp_t gfp) { int ret; if (incnt <= stream->incnt) goto out; ret = genradix_prealloc(&stream->in, incnt, gfp); if (ret) return ret; out: stream->incnt = incnt; return 0; } int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp) { const struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); int i, ret = 0; gfp |= __GFP_NOWARN; /* Initial stream->out size may be very big, so free it and alloc * a new one with new outcnt to save memory if needed. */ if (outcnt == stream->outcnt) goto handle_in; /* Filter out chunks queued on streams that won't exist anymore */ sched->unsched_all(stream); sctp_stream_outq_migrate(stream, NULL, outcnt); sched->sched_all(stream); ret = sctp_stream_alloc_out(stream, outcnt, gfp); if (ret) return ret; for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; handle_in: sctp_stream_interleave_init(stream); if (!incnt) return 0; return sctp_stream_alloc_in(stream, incnt, gfp); } int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) { struct sctp_stream_out_ext *soute; int ret; soute = kzalloc(sizeof(*soute), GFP_KERNEL); if (!soute) return -ENOMEM; SCTP_SO(stream, sid)->ext = soute; ret = sctp_sched_init_sid(stream, sid, GFP_KERNEL); if (ret) { kfree(SCTP_SO(stream, sid)->ext); SCTP_SO(stream, sid)->ext = NULL; } return ret; } void sctp_stream_free(struct sctp_stream *stream) { const struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); int i; sched->unsched_all(stream); for (i = 0; i < stream->outcnt; i++) sctp_stream_free_ext(stream, i); genradix_free(&stream->out); genradix_free(&stream->in); } void sctp_stream_clear(struct sctp_stream *stream) { int i; for (i = 0; i < stream->outcnt; i++) { SCTP_SO(stream, i)->mid = 0; SCTP_SO(stream, i)->mid_uo = 0; } for (i = 0; i < stream->incnt; i++) SCTP_SI(stream, i)->mid = 0; } void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) { const struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); sched->unsched_all(stream); sctp_stream_outq_migrate(stream, new, new->outcnt); sctp_stream_free(stream); stream->out = new->out; stream->in = new->in; stream->outcnt = new->outcnt; stream->incnt = new->incnt; sched->sched_all(stream); new->out.tree.root = NULL; new->in.tree.root = NULL; new->outcnt = 0; new->incnt = 0; } static int sctp_send_reconf(struct sctp_association *asoc, struct sctp_chunk *chunk) { int retval = 0; retval = sctp_primitive_RECONF(asoc->base.net, asoc, chunk); if (retval) sctp_chunk_free(chunk); return retval; } static bool sctp_stream_outq_is_empty(struct sctp_stream *stream, __u16 str_nums, __be16 *str_list) { struct sctp_association *asoc; __u16 i; asoc = container_of(stream, struct sctp_association, stream); if (!asoc->outqueue.out_qlen) return true; if (!str_nums) return false; for (i = 0; i < str_nums; i++) { __u16 sid = ntohs(str_list[i]); if (SCTP_SO(stream, sid)->ext && !list_empty(&SCTP_SO(stream, sid)->ext->outq)) return false; } return true; } int sctp_send_reset_streams(struct sctp_association *asoc, struct sctp_reset_streams *params) { struct sctp_stream *stream = &asoc->stream; __u16 i, str_nums, *str_list; struct sctp_chunk *chunk; int retval = -EINVAL; __be16 *nstr_list; bool out, in; if (!asoc->peer.reconf_capable || !(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ)) { retval = -ENOPROTOOPT; goto out; } if (asoc->strreset_outstanding) { retval = -EINPROGRESS; goto out; } out = params->srs_flags & SCTP_STREAM_RESET_OUTGOING; in = params->srs_flags & SCTP_STREAM_RESET_INCOMING; if (!out && !in) goto out; str_nums = params->srs_number_streams; str_list = params->srs_stream_list; if (str_nums) { int param_len = 0; if (out) { for (i = 0; i < str_nums; i++) if (str_list[i] >= stream->outcnt) goto out; param_len = str_nums * sizeof(__u16) + sizeof(struct sctp_strreset_outreq); } if (in) { for (i = 0; i < str_nums; i++) if (str_list[i] >= stream->incnt) goto out; param_len += str_nums * sizeof(__u16) + sizeof(struct sctp_strreset_inreq); } if (param_len > SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_reconf_chunk)) goto out; } nstr_list = kcalloc(str_nums, sizeof(__be16), GFP_KERNEL); if (!nstr_list) { retval = -ENOMEM; goto out; } for (i = 0; i < str_nums; i++) nstr_list[i] = htons(str_list[i]); if (out && !sctp_stream_outq_is_empty(stream, str_nums, nstr_list)) { kfree(nstr_list); retval = -EAGAIN; goto out; } chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in); kfree(nstr_list); if (!chunk) { retval = -ENOMEM; goto out; } if (out) { if (str_nums) for (i = 0; i < str_nums; i++) SCTP_SO(stream, str_list[i])->state = SCTP_STREAM_CLOSED; else for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_CLOSED; } asoc->strreset_chunk = chunk; sctp_chunk_hold(asoc->strreset_chunk); retval = sctp_send_reconf(asoc, chunk); if (retval) { sctp_chunk_put(asoc->strreset_chunk); asoc->strreset_chunk = NULL; if (!out) goto out; if (str_nums) for (i = 0; i < str_nums; i++) SCTP_SO(stream, str_list[i])->state = SCTP_STREAM_OPEN; else for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; goto out; } asoc->strreset_outstanding = out + in; out: return retval; } int sctp_send_reset_assoc(struct sctp_association *asoc) { struct sctp_stream *stream = &asoc->stream; struct sctp_chunk *chunk = NULL; int retval; __u16 i; if (!asoc->peer.reconf_capable || !(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ)) return -ENOPROTOOPT; if (asoc->strreset_outstanding) return -EINPROGRESS; if (!sctp_outq_is_empty(&asoc->outqueue)) return -EAGAIN; chunk = sctp_make_strreset_tsnreq(asoc); if (!chunk) return -ENOMEM; /* Block further xmit of data until this request is completed */ for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_CLOSED; asoc->strreset_chunk = chunk; sctp_chunk_hold(asoc->strreset_chunk); retval = sctp_send_reconf(asoc, chunk); if (retval) { sctp_chunk_put(asoc->strreset_chunk); asoc->strreset_chunk = NULL; for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; return retval; } asoc->strreset_outstanding = 1; return 0; } int sctp_send_add_streams(struct sctp_association *asoc, struct sctp_add_streams *params) { struct sctp_stream *stream = &asoc->stream; struct sctp_chunk *chunk = NULL; int retval; __u32 outcnt, incnt; __u16 out, in; if (!asoc->peer.reconf_capable || !(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ)) { retval = -ENOPROTOOPT; goto out; } if (asoc->strreset_outstanding) { retval = -EINPROGRESS; goto out; } out = params->sas_outstrms; in = params->sas_instrms; outcnt = stream->outcnt + out; incnt = stream->incnt + in; if (outcnt > SCTP_MAX_STREAM || incnt > SCTP_MAX_STREAM || (!out && !in)) { retval = -EINVAL; goto out; } if (out) { retval = sctp_stream_alloc_out(stream, outcnt, GFP_KERNEL); if (retval) goto out; } chunk = sctp_make_strreset_addstrm(asoc, out, in); if (!chunk) { retval = -ENOMEM; goto out; } asoc->strreset_chunk = chunk; sctp_chunk_hold(asoc->strreset_chunk); retval = sctp_send_reconf(asoc, chunk); if (retval) { sctp_chunk_put(asoc->strreset_chunk); asoc->strreset_chunk = NULL; goto out; } asoc->strreset_outstanding = !!out + !!in; out: return retval; } static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param( struct sctp_association *asoc, __be32 resp_seq, __be16 type) { struct sctp_chunk *chunk = asoc->strreset_chunk; struct sctp_reconf_chunk *hdr; union sctp_params param; if (!chunk) return NULL; hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr; sctp_walk_params(param, hdr) { /* sctp_strreset_tsnreq is actually the basic structure * of all stream reconf params, so it's safe to use it * to access request_seq. */ struct sctp_strreset_tsnreq *req = param.v; if ((!resp_seq || req->request_seq == resp_seq) && (!type || type == req->param_hdr.type)) return param.v; } return NULL; } static void sctp_update_strreset_result(struct sctp_association *asoc, __u32 result) { asoc->strreset_result[1] = asoc->strreset_result[0]; asoc->strreset_result[0] = result; } struct sctp_chunk *sctp_process_strreset_outreq( struct sctp_association *asoc, union sctp_params param, struct sctp_ulpevent **evp) { struct sctp_strreset_outreq *outreq = param.v; struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; __be16 *str_p = NULL; __u32 request_seq; __u16 i, nums; request_seq = ntohl(outreq->request_seq); if (ntohl(outreq->send_reset_at_tsn) > sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map)) { result = SCTP_STRRESET_IN_PROGRESS; goto err; } if (TSN_lt(asoc->strreset_inseq, request_seq) || TSN_lt(request_seq, asoc->strreset_inseq - 2)) { result = SCTP_STRRESET_ERR_BAD_SEQNO; goto err; } else if (TSN_lt(request_seq, asoc->strreset_inseq)) { i = asoc->strreset_inseq - request_seq - 1; result = asoc->strreset_result[i]; goto err; } asoc->strreset_inseq++; /* Check strreset_enable after inseq inc, as sender cannot tell * the peer doesn't enable strreset after receiving response with * result denied, as well as to keep consistent with bsd. */ if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ)) goto out; nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16); str_p = outreq->list_of_streams; for (i = 0; i < nums; i++) { if (ntohs(str_p[i]) >= stream->incnt) { result = SCTP_STRRESET_ERR_WRONG_SSN; goto out; } } if (asoc->strreset_chunk) { if (!sctp_chunk_lookup_strreset_param( asoc, outreq->response_seq, SCTP_PARAM_RESET_IN_REQUEST)) { /* same process with outstanding isn't 0 */ result = SCTP_STRRESET_ERR_IN_PROGRESS; goto out; } asoc->strreset_outstanding--; asoc->strreset_outseq++; if (!asoc->strreset_outstanding) { struct sctp_transport *t; t = asoc->strreset_chunk->transport; if (timer_delete(&t->reconf_timer)) sctp_transport_put(t); sctp_chunk_put(asoc->strreset_chunk); asoc->strreset_chunk = NULL; } } if (nums) for (i = 0; i < nums; i++) SCTP_SI(stream, ntohs(str_p[i]))->mid = 0; else for (i = 0; i < stream->incnt; i++) SCTP_SI(stream, i)->mid = 0; result = SCTP_STRRESET_PERFORMED; *evp = sctp_ulpevent_make_stream_reset_event(asoc, SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC); out: sctp_update_strreset_result(asoc, result); err: return sctp_make_strreset_resp(asoc, result, request_seq); } struct sctp_chunk *sctp_process_strreset_inreq( struct sctp_association *asoc, union sctp_params param, struct sctp_ulpevent **evp) { struct sctp_strreset_inreq *inreq = param.v; struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; struct sctp_chunk *chunk = NULL; __u32 request_seq; __u16 i, nums; __be16 *str_p; request_seq = ntohl(inreq->request_seq); if (TSN_lt(asoc->strreset_inseq, request_seq) || TSN_lt(request_seq, asoc->strreset_inseq - 2)) { result = SCTP_STRRESET_ERR_BAD_SEQNO; goto err; } else if (TSN_lt(request_seq, asoc->strreset_inseq)) { i = asoc->strreset_inseq - request_seq - 1; result = asoc->strreset_result[i]; if (result == SCTP_STRRESET_PERFORMED) return NULL; goto err; } asoc->strreset_inseq++; if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ)) goto out; if (asoc->strreset_outstanding) { result = SCTP_STRRESET_ERR_IN_PROGRESS; goto out; } nums = (ntohs(param.p->length) - sizeof(*inreq)) / sizeof(__u16); str_p = inreq->list_of_streams; for (i = 0; i < nums; i++) { if (ntohs(str_p[i]) >= stream->outcnt) { result = SCTP_STRRESET_ERR_WRONG_SSN; goto out; } } if (!sctp_stream_outq_is_empty(stream, nums, str_p)) { result = SCTP_STRRESET_IN_PROGRESS; asoc->strreset_inseq--; goto err; } chunk = sctp_make_strreset_req(asoc, nums, str_p, 1, 0); if (!chunk) goto out; if (nums) for (i = 0; i < nums; i++) SCTP_SO(stream, ntohs(str_p[i]))->state = SCTP_STREAM_CLOSED; else for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_CLOSED; asoc->strreset_chunk = chunk; asoc->strreset_outstanding = 1; sctp_chunk_hold(asoc->strreset_chunk); result = SCTP_STRRESET_PERFORMED; out: sctp_update_strreset_result(asoc, result); err: if (!chunk) chunk = sctp_make_strreset_resp(asoc, result, request_seq); return chunk; } struct sctp_chunk *sctp_process_strreset_tsnreq( struct sctp_association *asoc, union sctp_params param, struct sctp_ulpevent **evp) { __u32 init_tsn = 0, next_tsn = 0, max_tsn_seen; struct sctp_strreset_tsnreq *tsnreq = param.v; struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; __u32 request_seq; __u16 i; request_seq = ntohl(tsnreq->request_seq); if (TSN_lt(asoc->strreset_inseq, request_seq) || TSN_lt(request_seq, asoc->strreset_inseq - 2)) { result = SCTP_STRRESET_ERR_BAD_SEQNO; goto err; } else if (TSN_lt(request_seq, asoc->strreset_inseq)) { i = asoc->strreset_inseq - request_seq - 1; result = asoc->strreset_result[i]; if (result == SCTP_STRRESET_PERFORMED) { next_tsn = asoc->ctsn_ack_point + 1; init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1; } goto err; } if (!sctp_outq_is_empty(&asoc->outqueue)) { result = SCTP_STRRESET_IN_PROGRESS; goto err; } asoc->strreset_inseq++; if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ)) goto out; if (asoc->strreset_outstanding) { result = SCTP_STRRESET_ERR_IN_PROGRESS; goto out; } /* G4: The same processing as though a FWD-TSN chunk (as defined in * [RFC3758]) with all streams affected and a new cumulative TSN * ACK of the Receiver's Next TSN minus 1 were received MUST be * performed. */ max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map); asoc->stream.si->report_ftsn(&asoc->ulpq, max_tsn_seen); /* G1: Compute an appropriate value for the Receiver's Next TSN -- the * TSN that the peer should use to send the next DATA chunk. The * value SHOULD be the smallest TSN not acknowledged by the * receiver of the request plus 2^31. */ init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1U << 31); sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, init_tsn, GFP_ATOMIC); /* G3: The same processing as though a SACK chunk with no gap report * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were * received MUST be performed. */ sctp_outq_free(&asoc->outqueue); /* G2: Compute an appropriate value for the local endpoint's next TSN, * i.e., the next TSN assigned by the receiver of the SSN/TSN reset * chunk. The value SHOULD be the highest TSN sent by the receiver * of the request plus 1. */ next_tsn = asoc->next_tsn; asoc->ctsn_ack_point = next_tsn - 1; asoc->adv_peer_ack_point = asoc->ctsn_ack_point; /* G5: The next expected and outgoing SSNs MUST be reset to 0 for all * incoming and outgoing streams. */ for (i = 0; i < stream->outcnt; i++) { SCTP_SO(stream, i)->mid = 0; SCTP_SO(stream, i)->mid_uo = 0; } for (i = 0; i < stream->incnt; i++) SCTP_SI(stream, i)->mid = 0; result = SCTP_STRRESET_PERFORMED; *evp = sctp_ulpevent_make_assoc_reset_event(asoc, 0, init_tsn, next_tsn, GFP_ATOMIC); out: sctp_update_strreset_result(asoc, result); err: return sctp_make_strreset_tsnresp(asoc, result, request_seq, next_tsn, init_tsn); } struct sctp_chunk *sctp_process_strreset_addstrm_out( struct sctp_association *asoc, union sctp_params param, struct sctp_ulpevent **evp) { struct sctp_strreset_addstrm *addstrm = param.v; struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; __u32 request_seq, incnt; __u16 in, i; request_seq = ntohl(addstrm->request_seq); if (TSN_lt(asoc->strreset_inseq, request_seq) || TSN_lt(request_seq, asoc->strreset_inseq - 2)) { result = SCTP_STRRESET_ERR_BAD_SEQNO; goto err; } else if (TSN_lt(request_seq, asoc->strreset_inseq)) { i = asoc->strreset_inseq - request_seq - 1; result = asoc->strreset_result[i]; goto err; } asoc->strreset_inseq++; if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ)) goto out; in = ntohs(addstrm->number_of_streams); incnt = stream->incnt + in; if (!in || incnt > SCTP_MAX_STREAM) goto out; if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC)) goto out; if (asoc->strreset_chunk) { if (!sctp_chunk_lookup_strreset_param( asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) { /* same process with outstanding isn't 0 */ result = SCTP_STRRESET_ERR_IN_PROGRESS; goto out; } asoc->strreset_outstanding--; asoc->strreset_outseq++; if (!asoc->strreset_outstanding) { struct sctp_transport *t; t = asoc->strreset_chunk->transport; if (timer_delete(&t->reconf_timer)) sctp_transport_put(t); sctp_chunk_put(asoc->strreset_chunk); asoc->strreset_chunk = NULL; } } stream->incnt = incnt; result = SCTP_STRRESET_PERFORMED; *evp = sctp_ulpevent_make_stream_change_event(asoc, 0, ntohs(addstrm->number_of_streams), 0, GFP_ATOMIC); out: sctp_update_strreset_result(asoc, result); err: return sctp_make_strreset_resp(asoc, result, request_seq); } struct sctp_chunk *sctp_process_strreset_addstrm_in( struct sctp_association *asoc, union sctp_params param, struct sctp_ulpevent **evp) { struct sctp_strreset_addstrm *addstrm = param.v; struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; struct sctp_chunk *chunk = NULL; __u32 request_seq, outcnt; __u16 out, i; int ret; request_seq = ntohl(addstrm->request_seq); if (TSN_lt(asoc->strreset_inseq, request_seq) || TSN_lt(request_seq, asoc->strreset_inseq - 2)) { result = SCTP_STRRESET_ERR_BAD_SEQNO; goto err; } else if (TSN_lt(request_seq, asoc->strreset_inseq)) { i = asoc->strreset_inseq - request_seq - 1; result = asoc->strreset_result[i]; if (result == SCTP_STRRESET_PERFORMED) return NULL; goto err; } asoc->strreset_inseq++; if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ)) goto out; if (asoc->strreset_outstanding) { result = SCTP_STRRESET_ERR_IN_PROGRESS; goto out; } out = ntohs(addstrm->number_of_streams); outcnt = stream->outcnt + out; if (!out || outcnt > SCTP_MAX_STREAM) goto out; ret = sctp_stream_alloc_out(stream, outcnt, GFP_ATOMIC); if (ret) goto out; chunk = sctp_make_strreset_addstrm(asoc, out, 0); if (!chunk) goto out; asoc->strreset_chunk = chunk; asoc->strreset_outstanding = 1; sctp_chunk_hold(asoc->strreset_chunk); stream->outcnt = outcnt; result = SCTP_STRRESET_PERFORMED; out: sctp_update_strreset_result(asoc, result); err: if (!chunk) chunk = sctp_make_strreset_resp(asoc, result, request_seq); return chunk; } struct sctp_chunk *sctp_process_strreset_resp( struct sctp_association *asoc, union sctp_params param, struct sctp_ulpevent **evp) { struct sctp_stream *stream = &asoc->stream; struct sctp_strreset_resp *resp = param.v; struct sctp_transport *t; __u16 i, nums, flags = 0; struct sctp_paramhdr *req; __u32 result; req = sctp_chunk_lookup_strreset_param(asoc, resp->response_seq, 0); if (!req) return NULL; result = ntohl(resp->result); if (result != SCTP_STRRESET_PERFORMED) { /* if in progress, do nothing but retransmit */ if (result == SCTP_STRRESET_IN_PROGRESS) return NULL; else if (result == SCTP_STRRESET_DENIED) flags = SCTP_STREAM_RESET_DENIED; else flags = SCTP_STREAM_RESET_FAILED; } if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) { struct sctp_strreset_outreq *outreq; __be16 *str_p; outreq = (struct sctp_strreset_outreq *)req; str_p = outreq->list_of_streams; nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / sizeof(__u16); if (result == SCTP_STRRESET_PERFORMED) { struct sctp_stream_out *sout; if (nums) { for (i = 0; i < nums; i++) { sout = SCTP_SO(stream, ntohs(str_p[i])); sout->mid = 0; sout->mid_uo = 0; } } else { for (i = 0; i < stream->outcnt; i++) { sout = SCTP_SO(stream, i); sout->mid = 0; sout->mid_uo = 0; } } } flags |= SCTP_STREAM_RESET_OUTGOING_SSN; for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, nums, str_p, GFP_ATOMIC); } else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) { struct sctp_strreset_inreq *inreq; __be16 *str_p; /* if the result is performed, it's impossible for inreq */ if (result == SCTP_STRRESET_PERFORMED) return NULL; inreq = (struct sctp_strreset_inreq *)req; str_p = inreq->list_of_streams; nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / sizeof(__u16); flags |= SCTP_STREAM_RESET_INCOMING_SSN; *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, nums, str_p, GFP_ATOMIC); } else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) { struct sctp_strreset_resptsn *resptsn; __u32 stsn, rtsn; /* check for resptsn, as sctp_verify_reconf didn't do it*/ if (ntohs(param.p->length) != sizeof(*resptsn)) return NULL; resptsn = (struct sctp_strreset_resptsn *)resp; stsn = ntohl(resptsn->senders_next_tsn); rtsn = ntohl(resptsn->receivers_next_tsn); if (result == SCTP_STRRESET_PERFORMED) { __u32 mtsn = sctp_tsnmap_get_max_tsn_seen( &asoc->peer.tsn_map); LIST_HEAD(temp); asoc->stream.si->report_ftsn(&asoc->ulpq, mtsn); sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, stsn, GFP_ATOMIC); /* Clean up sacked and abandoned queues only. As the * out_chunk_list may not be empty, splice it to temp, * then get it back after sctp_outq_free is done. */ list_splice_init(&asoc->outqueue.out_chunk_list, &temp); sctp_outq_free(&asoc->outqueue); list_splice_init(&temp, &asoc->outqueue.out_chunk_list); asoc->next_tsn = rtsn; asoc->ctsn_ack_point = asoc->next_tsn - 1; asoc->adv_peer_ack_point = asoc->ctsn_ack_point; for (i = 0; i < stream->outcnt; i++) { SCTP_SO(stream, i)->mid = 0; SCTP_SO(stream, i)->mid_uo = 0; } for (i = 0; i < stream->incnt; i++) SCTP_SI(stream, i)->mid = 0; } for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; *evp = sctp_ulpevent_make_assoc_reset_event(asoc, flags, stsn, rtsn, GFP_ATOMIC); } else if (req->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS) { struct sctp_strreset_addstrm *addstrm; __u16 number; addstrm = (struct sctp_strreset_addstrm *)req; nums = ntohs(addstrm->number_of_streams); number = stream->outcnt - nums; if (result == SCTP_STRRESET_PERFORMED) { for (i = number; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; } else { sctp_stream_shrink_out(stream, number); stream->outcnt = number; } *evp = sctp_ulpevent_make_stream_change_event(asoc, flags, 0, nums, GFP_ATOMIC); } else if (req->type == SCTP_PARAM_RESET_ADD_IN_STREAMS) { struct sctp_strreset_addstrm *addstrm; /* if the result is performed, it's impossible for addstrm in * request. */ if (result == SCTP_STRRESET_PERFORMED) return NULL; addstrm = (struct sctp_strreset_addstrm *)req; nums = ntohs(addstrm->number_of_streams); *evp = sctp_ulpevent_make_stream_change_event(asoc, flags, nums, 0, GFP_ATOMIC); } asoc->strreset_outstanding--; asoc->strreset_outseq++; /* remove everything for this reconf request */ if (!asoc->strreset_outstanding) { t = asoc->strreset_chunk->transport; if (timer_delete(&t->reconf_timer)) sctp_transport_put(t); sctp_chunk_put(asoc->strreset_chunk); asoc->strreset_chunk = NULL; } return NULL; } |
| 40 41 40 41 40 41 170 44 170 164 9 37 13 25 215 217 174 162 35 219 140 118 78 44 117 170 169 76 15 15 13 1 1 15 15 15 15 15 15 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 | // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001-2002 International Business Machines, Corp. * Copyright (c) 2001 Intel Corp. * Copyright (c) 2001 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll * * This file is part of the SCTP kernel implementation * * This abstraction represents an SCTP endpoint. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * Jon Grimm <jgrimm@austin.ibm.com> * Daisy Chang <daisyc@us.ibm.com> * Dajiang Zhang <dajiang.zhang@nokia.com> */ #include <linux/types.h> #include <linux/slab.h> #include <linux/in.h> #include <linux/random.h> /* get_random_bytes() */ #include <net/sock.h> #include <net/ipv6.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> /* Forward declarations for internal helpers. */ static void sctp_endpoint_bh_rcv(struct work_struct *work); static void gen_cookie_auth_key(struct hmac_sha256_key *key) { u8 raw_key[SCTP_COOKIE_KEY_SIZE]; get_random_bytes(raw_key, sizeof(raw_key)); hmac_sha256_preparekey(key, raw_key, sizeof(raw_key)); memzero_explicit(raw_key, sizeof(raw_key)); } /* * Initialize the base fields of the endpoint structure. */ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct sock *sk, gfp_t gfp) { struct net *net = sock_net(sk); struct sctp_shared_key *null_key; ep->asconf_enable = net->sctp.addip_enable; ep->auth_enable = net->sctp.auth_enable; if (ep->auth_enable) { if (sctp_auth_init(ep, gfp)) goto nomem; if (ep->asconf_enable) { sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF); sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK); } } /* Initialize the base structure. */ /* What type of endpoint are we? */ ep->base.type = SCTP_EP_TYPE_SOCKET; /* Initialize the basic object fields. */ refcount_set(&ep->base.refcnt, 1); ep->base.dead = false; /* Create an input queue. */ sctp_inq_init(&ep->base.inqueue); /* Set its top-half handler */ sctp_inq_set_th_handler(&ep->base.inqueue, sctp_endpoint_bh_rcv); /* Initialize the bind addr area */ sctp_bind_addr_init(&ep->base.bind_addr, 0); /* Create the lists of associations. */ INIT_LIST_HEAD(&ep->asocs); /* Use SCTP specific send buffer space queues. */ ep->sndbuf_policy = net->sctp.sndbuf_policy; sk->sk_data_ready = sctp_data_ready; sk->sk_write_space = sctp_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); /* Get the receive buffer policy for this endpoint */ ep->rcvbuf_policy = net->sctp.rcvbuf_policy; /* Generate the cookie authentication key. */ gen_cookie_auth_key(&ep->cookie_auth_key); /* SCTP-AUTH extensions*/ INIT_LIST_HEAD(&ep->endpoint_shared_keys); null_key = sctp_auth_shkey_create(0, gfp); if (!null_key) goto nomem_shkey; list_add(&null_key->key_list, &ep->endpoint_shared_keys); /* Add the null key to the endpoint shared keys list and * set the hmcas and chunks pointers. */ ep->prsctp_enable = net->sctp.prsctp_enable; ep->reconf_enable = net->sctp.reconf_enable; ep->ecn_enable = net->sctp.ecn_enable; /* Remember who we are attached to. */ ep->base.sk = sk; ep->base.net = sock_net(sk); sock_hold(ep->base.sk); return ep; nomem_shkey: sctp_auth_free(ep); nomem: return NULL; } /* Create a sctp_endpoint with all that boring stuff initialized. * Returns NULL if there isn't enough memory. */ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp) { struct sctp_endpoint *ep; /* Build a local endpoint. */ ep = kzalloc(sizeof(*ep), gfp); if (!ep) goto fail; if (!sctp_endpoint_init(ep, sk, gfp)) goto fail_init; SCTP_DBG_OBJCNT_INC(ep); return ep; fail_init: kfree(ep); fail: return NULL; } /* Add an association to an endpoint. */ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep, struct sctp_association *asoc) { struct sock *sk = ep->base.sk; /* If this is a temporary association, don't bother * since we'll be removing it shortly and don't * want anyone to find it anyway. */ if (asoc->temp) return; /* Now just add it to our list of asocs */ list_add_tail(&asoc->asocs, &ep->asocs); /* Increment the backlog value for a TCP-style listening socket. */ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) sk_acceptq_added(sk); } /* Free the endpoint structure. Delay cleanup until * all users have released their reference count on this structure. */ void sctp_endpoint_free(struct sctp_endpoint *ep) { ep->base.dead = true; inet_sk_set_state(ep->base.sk, SCTP_SS_CLOSED); /* Unlink this endpoint, so we can't find it again! */ sctp_unhash_endpoint(ep); sctp_endpoint_put(ep); } /* Final destructor for endpoint. */ static void sctp_endpoint_destroy_rcu(struct rcu_head *head) { struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu); struct sock *sk = ep->base.sk; sctp_sk(sk)->ep = NULL; sock_put(sk); kfree(ep); SCTP_DBG_OBJCNT_DEC(ep); } static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { struct sock *sk; if (unlikely(!ep->base.dead)) { WARN(1, "Attempt to destroy undead endpoint %p!\n", ep); return; } /* SCTP-AUTH: Free up AUTH releated data such as shared keys * chunks and hmacs arrays that were allocated */ sctp_auth_destroy_keys(&ep->endpoint_shared_keys); sctp_auth_free(ep); /* Cleanup. */ sctp_inq_free(&ep->base.inqueue); sctp_bind_addr_free(&ep->base.bind_addr); memzero_explicit(&ep->cookie_auth_key, sizeof(ep->cookie_auth_key)); sk = ep->base.sk; /* Remove and free the port */ if (sctp_sk(sk)->bind_hash) sctp_put_port(sk); call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu); } /* Hold a reference to an endpoint. */ int sctp_endpoint_hold(struct sctp_endpoint *ep) { return refcount_inc_not_zero(&ep->base.refcnt); } /* Release a reference to an endpoint and clean up if there are * no more references. */ void sctp_endpoint_put(struct sctp_endpoint *ep) { if (refcount_dec_and_test(&ep->base.refcnt)) sctp_endpoint_destroy(ep); } /* Is this the endpoint we are looking for? */ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, struct net *net, const union sctp_addr *laddr, int dif, int sdif) { int bound_dev_if = READ_ONCE(ep->base.sk->sk_bound_dev_if); struct sctp_endpoint *retval = NULL; if (net_eq(ep->base.net, net) && sctp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif) && (htons(ep->base.bind_addr.port) == laddr->v4.sin_port)) { if (sctp_bind_addr_match(&ep->base.bind_addr, laddr, sctp_sk(ep->base.sk))) retval = ep; } return retval; } /* Find the association that goes with this chunk. * We lookup the transport from hashtable at first, then get association * through t->assoc. */ struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, const union sctp_addr *paddr, struct sctp_transport **transport) { struct sctp_association *asoc = NULL; struct sctp_transport *t; *transport = NULL; /* If the local port is not set, there can't be any associations * on this endpoint. */ if (!ep->base.bind_addr.port) return NULL; rcu_read_lock(); t = sctp_epaddr_lookup_transport(ep, paddr); if (!t) goto out; *transport = t; asoc = t->asoc; out: rcu_read_unlock(); return asoc; } /* Look for any peeled off association from the endpoint that matches the * given peer address. */ bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, const union sctp_addr *paddr) { int bound_dev_if = READ_ONCE(ep->base.sk->sk_bound_dev_if); struct sctp_sockaddr_entry *addr; struct net *net = ep->base.net; struct sctp_bind_addr *bp; bp = &ep->base.bind_addr; /* This function is called with the socket lock held, * so the address_list can not change. */ list_for_each_entry(addr, &bp->address_list, list) { if (sctp_has_association(net, &addr->a, paddr, bound_dev_if, bound_dev_if)) return true; } return false; } /* Do delayed input processing. This is scheduled by sctp_rcv(). * This may be called on BH or task time. */ static void sctp_endpoint_bh_rcv(struct work_struct *work) { struct sctp_endpoint *ep = container_of(work, struct sctp_endpoint, base.inqueue.immediate); struct sctp_association *asoc; struct sock *sk; struct net *net; struct sctp_transport *transport; struct sctp_chunk *chunk; struct sctp_inq *inqueue; union sctp_subtype subtype; enum sctp_state state; int error = 0; int first_time = 1; /* is this the first time through the loop */ if (ep->base.dead) return; asoc = NULL; inqueue = &ep->base.inqueue; sk = ep->base.sk; net = sock_net(sk); while (NULL != (chunk = sctp_inq_pop(inqueue))) { subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type); /* If the first chunk in the packet is AUTH, do special * processing specified in Section 6.3 of SCTP-AUTH spec */ if (first_time && (subtype.chunk == SCTP_CID_AUTH)) { struct sctp_chunkhdr *next_hdr; next_hdr = sctp_inq_peek(inqueue); if (!next_hdr) goto normal; /* If the next chunk is COOKIE-ECHO, skip the AUTH * chunk while saving a pointer to it so we can do * Authentication later (during cookie-echo * processing). */ if (next_hdr->type == SCTP_CID_COOKIE_ECHO) { chunk->auth_chunk = skb_clone(chunk->skb, GFP_ATOMIC); chunk->auth = 1; continue; } } normal: /* We might have grown an association since last we * looked, so try again. * * This happens when we've just processed our * COOKIE-ECHO chunk. */ if (NULL == chunk->asoc) { asoc = sctp_endpoint_lookup_assoc(ep, sctp_source(chunk), &transport); chunk->asoc = asoc; chunk->transport = transport; } state = asoc ? asoc->state : SCTP_STATE_CLOSED; if (sctp_auth_recv_cid(subtype.chunk, asoc) && !chunk->auth) continue; /* Remember where the last DATA chunk came from so we * know where to send the SACK. */ if (asoc && sctp_chunk_is_data(chunk)) asoc->peer.last_data_from = chunk->transport; else { SCTP_INC_STATS(ep->base.net, SCTP_MIB_INCTRLCHUNKS); if (asoc) asoc->stats.ictrlchunks++; } if (chunk->transport) chunk->transport->last_time_heard = ktime_get(); error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state, ep, asoc, chunk, GFP_ATOMIC); if (error && chunk) chunk->pdiscard = 1; /* Check to see if the endpoint is freed in response to * the incoming chunk. If so, get out of the while loop. */ if (!sctp_sk(sk)->ep) break; if (first_time) first_time = 0; } } |
| 2 2 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Sysfs attributes of bridge * Linux ethernet bridge * * Authors: * Stephen Hemminger <shemminger@osdl.org> */ #include <linux/capability.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/if_bridge.h> #include <linux/rtnetlink.h> #include <linux/spinlock.h> #include <linux/times.h> #include <linux/sched/signal.h> #include "br_private.h" /* IMPORTANT: new bridge options must be added with netlink support only * please do not add new sysfs entries */ #define to_bridge(cd) ((struct net_bridge *)netdev_priv(to_net_dev(cd))) /* * Common code for storing bridge parameters. */ static ssize_t store_bridge_parm(struct device *d, const char *buf, size_t len, int (*set)(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack)) { struct net_bridge *br = to_bridge(d); struct netlink_ext_ack extack = {0}; unsigned long val; int err; if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; err = kstrtoul(buf, 0, &val); if (err != 0) return err; if (!rtnl_trylock()) return restart_syscall(); err = (*set)(br, val, &extack); if (!err) netdev_state_change(br->dev); if (extack._msg) { if (err) br_err(br, "%s\n", extack._msg); else br_warn(br, "%s\n", extack._msg); } rtnl_unlock(); return err ? err : len; } static ssize_t forward_delay_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); } static int set_forward_delay(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_set_forward_delay(br, val); } static ssize_t forward_delay_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_forward_delay); } static DEVICE_ATTR_RW(forward_delay); static ssize_t hello_time_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lu\n", jiffies_to_clock_t(to_bridge(d)->hello_time)); } static int set_hello_time(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_set_hello_time(br, val); } static ssize_t hello_time_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_hello_time); } static DEVICE_ATTR_RW(hello_time); static ssize_t max_age_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lu\n", jiffies_to_clock_t(to_bridge(d)->max_age)); } static int set_max_age(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_set_max_age(br, val); } static ssize_t max_age_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_max_age); } static DEVICE_ATTR_RW(max_age); static ssize_t ageing_time_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time)); } static int set_ageing_time(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_set_ageing_time(br, val); } static ssize_t ageing_time_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_ageing_time); } static DEVICE_ATTR_RW(ageing_time); static ssize_t stp_state_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->stp_enabled); } static int set_stp_state(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_stp_set_enabled(br, val, extack); } static ssize_t stp_state_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_stp_state); } static DEVICE_ATTR_RW(stp_state); static ssize_t group_fwd_mask_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%#x\n", br->group_fwd_mask); } static int set_group_fwd_mask(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { if (val & BR_GROUPFWD_RESTRICTED) return -EINVAL; br->group_fwd_mask = val; return 0; } static ssize_t group_fwd_mask_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_group_fwd_mask); } static DEVICE_ATTR_RW(group_fwd_mask); static ssize_t priority_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]); } static int set_priority(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_stp_set_bridge_priority(br, (u16) val); return 0; } static ssize_t priority_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_priority); } static DEVICE_ATTR_RW(priority); static ssize_t root_id_show(struct device *d, struct device_attribute *attr, char *buf) { return br_show_bridge_id(buf, &to_bridge(d)->designated_root); } static DEVICE_ATTR_RO(root_id); static ssize_t bridge_id_show(struct device *d, struct device_attribute *attr, char *buf) { return br_show_bridge_id(buf, &to_bridge(d)->bridge_id); } static DEVICE_ATTR_RO(bridge_id); static ssize_t root_port_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->root_port); } static DEVICE_ATTR_RO(root_port); static ssize_t root_path_cost_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->root_path_cost); } static DEVICE_ATTR_RO(root_path_cost); static ssize_t topology_change_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->topology_change); } static DEVICE_ATTR_RO(topology_change); static ssize_t topology_change_detected_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->topology_change_detected); } static DEVICE_ATTR_RO(topology_change_detected); static ssize_t hello_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->hello_timer)); } static DEVICE_ATTR_RO(hello_timer); static ssize_t tcn_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->tcn_timer)); } static DEVICE_ATTR_RO(tcn_timer); static ssize_t topology_change_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->topology_change_timer)); } static DEVICE_ATTR_RO(topology_change_timer); static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->gc_work.timer)); } static DEVICE_ATTR_RO(gc_timer); static ssize_t group_addr_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%pM\n", br->group_addr); } static ssize_t group_addr_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { struct net_bridge *br = to_bridge(d); u8 new_addr[6]; if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; if (!mac_pton(buf, new_addr)) return -EINVAL; if (!is_link_local_ether_addr(new_addr)) return -EINVAL; if (new_addr[5] == 1 || /* 802.3x Pause address */ new_addr[5] == 2 || /* 802.3ad Slow protocols */ new_addr[5] == 3) /* 802.1X PAE address */ return -EINVAL; if (!rtnl_trylock()) return restart_syscall(); spin_lock_bh(&br->lock); ether_addr_copy(br->group_addr, new_addr); spin_unlock_bh(&br->lock); br_opt_toggle(br, BROPT_GROUP_ADDR_SET, true); br_recalculate_fwd_mask(br); netdev_state_change(br->dev); rtnl_unlock(); return len; } static DEVICE_ATTR_RW(group_addr); static int set_flush(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { struct net_bridge_fdb_flush_desc desc = { .flags_mask = BIT(BR_FDB_STATIC) }; br_fdb_flush(br, &desc); return 0; } static ssize_t flush_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_flush); } static DEVICE_ATTR_WO(flush); static ssize_t no_linklocal_learn_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br_boolopt_get(br, BR_BOOLOPT_NO_LL_LEARN)); } static int set_no_linklocal_learn(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_boolopt_toggle(br, BR_BOOLOPT_NO_LL_LEARN, !!val, extack); } static ssize_t no_linklocal_learn_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_no_linklocal_learn); } static DEVICE_ATTR_RW(no_linklocal_learn); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t multicast_router_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->multicast_ctx.multicast_router); } static int set_multicast_router(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_multicast_set_router(&br->multicast_ctx, val); } static ssize_t multicast_router_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_multicast_router); } static DEVICE_ATTR_RW(multicast_router); static ssize_t multicast_snooping_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_ENABLED)); } static ssize_t multicast_snooping_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_toggle); } static DEVICE_ATTR_RW(multicast_snooping); static ssize_t multicast_query_use_ifaddr_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR)); } static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_opt_toggle(br, BROPT_MULTICAST_QUERY_USE_IFADDR, !!val); return 0; } static ssize_t multicast_query_use_ifaddr_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_use_ifaddr); } static DEVICE_ATTR_RW(multicast_query_use_ifaddr); static ssize_t multicast_querier_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->multicast_ctx.multicast_querier); } static int set_multicast_querier(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_multicast_set_querier(&br->multicast_ctx, val); } static ssize_t multicast_querier_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_multicast_querier); } static DEVICE_ATTR_RW(multicast_querier); static ssize_t hash_elasticity_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%u\n", RHT_ELASTICITY); } static int set_elasticity(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { /* 16 is RHT_ELASTICITY */ NL_SET_ERR_MSG_MOD(extack, "the hash_elasticity option has been deprecated and is always 16"); return 0; } static ssize_t hash_elasticity_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_elasticity); } static DEVICE_ATTR_RW(hash_elasticity); static ssize_t hash_max_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br->hash_max); } static int set_hash_max(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br->hash_max = val; return 0; } static ssize_t hash_max_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_hash_max); } static DEVICE_ATTR_RW(hash_max); static ssize_t multicast_igmp_version_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br->multicast_ctx.multicast_igmp_version); } static int set_multicast_igmp_version(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_multicast_set_igmp_version(&br->multicast_ctx, val); } static ssize_t multicast_igmp_version_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_multicast_igmp_version); } static DEVICE_ATTR_RW(multicast_igmp_version); static ssize_t multicast_last_member_count_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br->multicast_ctx.multicast_last_member_count); } static int set_last_member_count(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br->multicast_ctx.multicast_last_member_count = val; return 0; } static ssize_t multicast_last_member_count_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_last_member_count); } static DEVICE_ATTR_RW(multicast_last_member_count); static ssize_t multicast_startup_query_count_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br->multicast_ctx.multicast_startup_query_count); } static int set_startup_query_count(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br->multicast_ctx.multicast_startup_query_count = val; return 0; } static ssize_t multicast_startup_query_count_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_startup_query_count); } static DEVICE_ATTR_RW(multicast_startup_query_count); static ssize_t multicast_last_member_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->multicast_ctx.multicast_last_member_interval)); } static int set_last_member_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br->multicast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val); return 0; } static ssize_t multicast_last_member_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_last_member_interval); } static DEVICE_ATTR_RW(multicast_last_member_interval); static ssize_t multicast_membership_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->multicast_ctx.multicast_membership_interval)); } static int set_membership_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br->multicast_ctx.multicast_membership_interval = clock_t_to_jiffies(val); return 0; } static ssize_t multicast_membership_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_membership_interval); } static DEVICE_ATTR_RW(multicast_membership_interval); static ssize_t multicast_querier_interval_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->multicast_ctx.multicast_querier_interval)); } static int set_querier_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br->multicast_ctx.multicast_querier_interval = clock_t_to_jiffies(val); return 0; } static ssize_t multicast_querier_interval_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_querier_interval); } static DEVICE_ATTR_RW(multicast_querier_interval); static ssize_t multicast_query_interval_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->multicast_ctx.multicast_query_interval)); } static int set_query_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_multicast_set_query_intvl(&br->multicast_ctx, val); return 0; } static ssize_t multicast_query_interval_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_interval); } static DEVICE_ATTR_RW(multicast_query_interval); static ssize_t multicast_query_response_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf( buf, "%lu\n", jiffies_to_clock_t(br->multicast_ctx.multicast_query_response_interval)); } static int set_query_response_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br->multicast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val); return 0; } static ssize_t multicast_query_response_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_response_interval); } static DEVICE_ATTR_RW(multicast_query_response_interval); static ssize_t multicast_startup_query_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf( buf, "%lu\n", jiffies_to_clock_t(br->multicast_ctx.multicast_startup_query_interval)); } static int set_startup_query_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_multicast_set_startup_query_intvl(&br->multicast_ctx, val); return 0; } static ssize_t multicast_startup_query_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_startup_query_interval); } static DEVICE_ATTR_RW(multicast_startup_query_interval); static ssize_t multicast_stats_enabled_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)); } static int set_stats_enabled(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_opt_toggle(br, BROPT_MULTICAST_STATS_ENABLED, !!val); return 0; } static ssize_t multicast_stats_enabled_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_stats_enabled); } static DEVICE_ATTR_RW(multicast_stats_enabled); #if IS_ENABLED(CONFIG_IPV6) static ssize_t multicast_mld_version_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br->multicast_ctx.multicast_mld_version); } static int set_multicast_mld_version(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_multicast_set_mld_version(&br->multicast_ctx, val); } static ssize_t multicast_mld_version_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_multicast_mld_version); } static DEVICE_ATTR_RW(multicast_mld_version); #endif #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static ssize_t nf_call_iptables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_IPTABLES)); } static int set_nf_call_iptables(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_opt_toggle(br, BROPT_NF_CALL_IPTABLES, !!val); return 0; } static ssize_t nf_call_iptables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_iptables); } static DEVICE_ATTR_RW(nf_call_iptables); static ssize_t nf_call_ip6tables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_IP6TABLES)); } static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_opt_toggle(br, BROPT_NF_CALL_IP6TABLES, !!val); return 0; } static ssize_t nf_call_ip6tables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_ip6tables); } static DEVICE_ATTR_RW(nf_call_ip6tables); static ssize_t nf_call_arptables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_ARPTABLES)); } static int set_nf_call_arptables(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_opt_toggle(br, BROPT_NF_CALL_ARPTABLES, !!val); return 0; } static ssize_t nf_call_arptables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_arptables); } static DEVICE_ATTR_RW(nf_call_arptables); #endif #ifdef CONFIG_BRIDGE_VLAN_FILTERING static ssize_t vlan_filtering_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br_opt_get(br, BROPT_VLAN_ENABLED)); } static ssize_t vlan_filtering_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_vlan_filter_toggle); } static DEVICE_ATTR_RW(vlan_filtering); static ssize_t vlan_protocol_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%#06x\n", ntohs(br->vlan_proto)); } static ssize_t vlan_protocol_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_vlan_set_proto); } static DEVICE_ATTR_RW(vlan_protocol); static ssize_t default_pvid_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->default_pvid); } static ssize_t default_pvid_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_vlan_set_default_pvid); } static DEVICE_ATTR_RW(default_pvid); static ssize_t vlan_stats_enabled_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br_opt_get(br, BROPT_VLAN_STATS_ENABLED)); } static int set_vlan_stats_enabled(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_vlan_set_stats(br, val); } static ssize_t vlan_stats_enabled_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_vlan_stats_enabled); } static DEVICE_ATTR_RW(vlan_stats_enabled); static ssize_t vlan_stats_per_port_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)); } static int set_vlan_stats_per_port(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_vlan_set_stats_per_port(br, val); } static ssize_t vlan_stats_per_port_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_vlan_stats_per_port); } static DEVICE_ATTR_RW(vlan_stats_per_port); #endif static struct attribute *bridge_attrs[] = { &dev_attr_forward_delay.attr, &dev_attr_hello_time.attr, &dev_attr_max_age.attr, &dev_attr_ageing_time.attr, &dev_attr_stp_state.attr, &dev_attr_group_fwd_mask.attr, &dev_attr_priority.attr, &dev_attr_bridge_id.attr, &dev_attr_root_id.attr, &dev_attr_root_path_cost.attr, &dev_attr_root_port.attr, &dev_attr_topology_change.attr, &dev_attr_topology_change_detected.attr, &dev_attr_hello_timer.attr, &dev_attr_tcn_timer.attr, &dev_attr_topology_change_timer.attr, &dev_attr_gc_timer.attr, &dev_attr_group_addr.attr, &dev_attr_flush.attr, &dev_attr_no_linklocal_learn.attr, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &dev_attr_multicast_router.attr, &dev_attr_multicast_snooping.attr, &dev_attr_multicast_querier.attr, &dev_attr_multicast_query_use_ifaddr.attr, &dev_attr_hash_elasticity.attr, &dev_attr_hash_max.attr, &dev_attr_multicast_last_member_count.attr, &dev_attr_multicast_startup_query_count.attr, &dev_attr_multicast_last_member_interval.attr, &dev_attr_multicast_membership_interval.attr, &dev_attr_multicast_querier_interval.attr, &dev_attr_multicast_query_interval.attr, &dev_attr_multicast_query_response_interval.attr, &dev_attr_multicast_startup_query_interval.attr, &dev_attr_multicast_stats_enabled.attr, &dev_attr_multicast_igmp_version.attr, #if IS_ENABLED(CONFIG_IPV6) &dev_attr_multicast_mld_version.attr, #endif #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) &dev_attr_nf_call_iptables.attr, &dev_attr_nf_call_ip6tables.attr, &dev_attr_nf_call_arptables.attr, #endif #ifdef CONFIG_BRIDGE_VLAN_FILTERING &dev_attr_vlan_filtering.attr, &dev_attr_vlan_protocol.attr, &dev_attr_default_pvid.attr, &dev_attr_vlan_stats_enabled.attr, &dev_attr_vlan_stats_per_port.attr, #endif NULL }; static const struct attribute_group bridge_group = { .name = SYSFS_BRIDGE_ATTR, .attrs = bridge_attrs, }; /* * Export the forwarding information table as a binary file * The records are struct __fdb_entry. * * Returns the number of bytes read. */ static ssize_t brforward_read(struct file *filp, struct kobject *kobj, const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); struct net_bridge *br = to_bridge(dev); int n; /* must read whole records */ if (off % sizeof(struct __fdb_entry) != 0) return -EINVAL; n = br_fdb_fillbuf(br, buf, count / sizeof(struct __fdb_entry), off / sizeof(struct __fdb_entry)); if (n > 0) n *= sizeof(struct __fdb_entry); return n; } static const struct bin_attribute bridge_forward = { .attr = { .name = SYSFS_BRIDGE_FDB, .mode = 0444, }, .read = brforward_read, }; /* * Add entries in sysfs onto the existing network class device * for the bridge. * Adds a attribute group "bridge" containing tuning parameters. * Binary attribute containing the forward table * Sub directory to hold links to interfaces. * * Note: the ifobj exists only to be a subdirectory * to hold links. The ifobj exists in same data structure * as it's parent the bridge so reference counting works. */ int br_sysfs_addbr(struct net_device *dev) { struct kobject *brobj = &dev->dev.kobj; struct net_bridge *br = netdev_priv(dev); int err; err = sysfs_create_group(brobj, &bridge_group); if (err) { pr_info("%s: can't create group %s/%s\n", __func__, dev->name, bridge_group.name); goto out1; } err = sysfs_create_bin_file(brobj, &bridge_forward); if (err) { pr_info("%s: can't create attribute file %s/%s\n", __func__, dev->name, bridge_forward.attr.name); goto out2; } br->ifobj = kobject_create_and_add(SYSFS_BRIDGE_PORT_SUBDIR, brobj); if (!br->ifobj) { pr_info("%s: can't add kobject (directory) %s/%s\n", __func__, dev->name, SYSFS_BRIDGE_PORT_SUBDIR); err = -ENOMEM; goto out3; } return 0; out3: sysfs_remove_bin_file(&dev->dev.kobj, &bridge_forward); out2: sysfs_remove_group(&dev->dev.kobj, &bridge_group); out1: return err; } void br_sysfs_delbr(struct net_device *dev) { struct kobject *kobj = &dev->dev.kobj; struct net_bridge *br = netdev_priv(dev); kobject_put(br->ifobj); sysfs_remove_bin_file(kobj, &bridge_forward); sysfs_remove_group(kobj, &bridge_group); } |
| 2605 14 2610 2613 2610 2609 795 2610 2624 2627 73 72 73 72 33 18 57 57 56 47 25 45 3 42 58 6 2621 33 33 2639 3 107 2 6 2558 2566 2608 2605 2536 110 1 1 2 2616 2620 2628 2622 2629 1 35 2522 1 35 2522 27 27 643 473 329 192 150 87 40 8 8 61 61 4 4 61 8 8 8 3 3 3 3 18 18 34 32 14 7 7 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 | // SPDX-License-Identifier: GPL-2.0 /* * Released under the GPLv2 only. */ #include <linux/module.h> #include <linux/string.h> #include <linux/bitops.h> #include <linux/slab.h> #include <linux/log2.h> #include <linux/kmsan.h> #include <linux/usb.h> #include <linux/wait.h> #include <linux/usb/hcd.h> #include <linux/scatterlist.h> #define to_urb(d) container_of(d, struct urb, kref) static void urb_destroy(struct kref *kref) { struct urb *urb = to_urb(kref); if (urb->transfer_flags & URB_FREE_BUFFER) kfree(urb->transfer_buffer); kfree(urb); } /** * usb_init_urb - initializes a urb so that it can be used by a USB driver * @urb: pointer to the urb to initialize * * Initializes a urb so that the USB subsystem can use it properly. * * If a urb is created with a call to usb_alloc_urb() it is not * necessary to call this function. Only use this if you allocate the * space for a struct urb on your own. If you call this function, be * careful when freeing the memory for your urb that it is no longer in * use by the USB core. * * Only use this function if you _really_ understand what you are doing. */ void usb_init_urb(struct urb *urb) { if (urb) { memset(urb, 0, sizeof(*urb)); kref_init(&urb->kref); INIT_LIST_HEAD(&urb->urb_list); INIT_LIST_HEAD(&urb->anchor_list); } } EXPORT_SYMBOL_GPL(usb_init_urb); /** * usb_alloc_urb - creates a new urb for a USB driver to use * @iso_packets: number of iso packets for this urb * @mem_flags: the type of memory to allocate, see kmalloc() for a list of * valid options for this. * * Creates an urb for the USB driver to use, initializes a few internal * structures, increments the usage counter, and returns a pointer to it. * * If the driver want to use this urb for interrupt, control, or bulk * endpoints, pass '0' as the number of iso packets. * * The driver must call usb_free_urb() when it is finished with the urb. * * Return: A pointer to the new urb, or %NULL if no memory is available. */ struct urb *usb_alloc_urb(int iso_packets, gfp_t mem_flags) { struct urb *urb; urb = kmalloc(struct_size(urb, iso_frame_desc, iso_packets), mem_flags); if (!urb) return NULL; usb_init_urb(urb); return urb; } EXPORT_SYMBOL_GPL(usb_alloc_urb); /** * usb_free_urb - frees the memory used by a urb when all users of it are finished * @urb: pointer to the urb to free, may be NULL * * Must be called when a user of a urb is finished with it. When the last user * of the urb calls this function, the memory of the urb is freed. * * Note: The transfer buffer associated with the urb is not freed unless the * URB_FREE_BUFFER transfer flag is set. */ void usb_free_urb(struct urb *urb) { if (urb) kref_put(&urb->kref, urb_destroy); } EXPORT_SYMBOL_GPL(usb_free_urb); /** * usb_get_urb - increments the reference count of the urb * @urb: pointer to the urb to modify, may be NULL * * This must be called whenever a urb is transferred from a device driver to a * host controller driver. This allows proper reference counting to happen * for urbs. * * Return: A pointer to the urb with the incremented reference counter. */ struct urb *usb_get_urb(struct urb *urb) { if (urb) kref_get(&urb->kref); return urb; } EXPORT_SYMBOL_GPL(usb_get_urb); /** * usb_anchor_urb - anchors an URB while it is processed * @urb: pointer to the urb to anchor * @anchor: pointer to the anchor * * This can be called to have access to URBs which are to be executed * without bothering to track them */ void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor) { unsigned long flags; spin_lock_irqsave(&anchor->lock, flags); usb_get_urb(urb); list_add_tail(&urb->anchor_list, &anchor->urb_list); urb->anchor = anchor; if (unlikely(anchor->poisoned)) atomic_inc(&urb->reject); spin_unlock_irqrestore(&anchor->lock, flags); } EXPORT_SYMBOL_GPL(usb_anchor_urb); static int usb_anchor_check_wakeup(struct usb_anchor *anchor) { return atomic_read(&anchor->suspend_wakeups) == 0 && list_empty(&anchor->urb_list); } /* Callers must hold anchor->lock */ static void __usb_unanchor_urb(struct urb *urb, struct usb_anchor *anchor) { urb->anchor = NULL; list_del(&urb->anchor_list); usb_put_urb(urb); if (usb_anchor_check_wakeup(anchor)) wake_up(&anchor->wait); } /** * usb_unanchor_urb - unanchors an URB * @urb: pointer to the urb to anchor * * Call this to stop the system keeping track of this URB */ void usb_unanchor_urb(struct urb *urb) { unsigned long flags; struct usb_anchor *anchor; if (!urb) return; anchor = urb->anchor; if (!anchor) return; spin_lock_irqsave(&anchor->lock, flags); /* * At this point, we could be competing with another thread which * has the same intention. To protect the urb from being unanchored * twice, only the winner of the race gets the job. */ if (likely(anchor == urb->anchor)) __usb_unanchor_urb(urb, anchor); spin_unlock_irqrestore(&anchor->lock, flags); } EXPORT_SYMBOL_GPL(usb_unanchor_urb); /*-------------------------------------------------------------------*/ static const int pipetypes[4] = { PIPE_CONTROL, PIPE_ISOCHRONOUS, PIPE_BULK, PIPE_INTERRUPT }; /** * usb_pipe_type_check - sanity check of a specific pipe for a usb device * @dev: struct usb_device to be checked * @pipe: pipe to check * * This performs a light-weight sanity check for the endpoint in the * given usb device. It returns 0 if the pipe is valid for the specific usb * device, otherwise a negative error code. */ int usb_pipe_type_check(struct usb_device *dev, unsigned int pipe) { const struct usb_host_endpoint *ep; ep = usb_pipe_endpoint(dev, pipe); if (!ep) return -EINVAL; if (usb_pipetype(pipe) != pipetypes[usb_endpoint_type(&ep->desc)]) return -EINVAL; return 0; } EXPORT_SYMBOL_GPL(usb_pipe_type_check); /** * usb_urb_ep_type_check - sanity check of endpoint in the given urb * @urb: urb to be checked * * This performs a light-weight sanity check for the endpoint in the * given urb. It returns 0 if the urb contains a valid endpoint, otherwise * a negative error code. */ int usb_urb_ep_type_check(const struct urb *urb) { return usb_pipe_type_check(urb->dev, urb->pipe); } EXPORT_SYMBOL_GPL(usb_urb_ep_type_check); /** * usb_submit_urb - issue an asynchronous transfer request for an endpoint * @urb: pointer to the urb describing the request * @mem_flags: the type of memory to allocate, see kmalloc() for a list * of valid options for this. * * This submits a transfer request, and transfers control of the URB * describing that request to the USB subsystem. Request completion will * be indicated later, asynchronously, by calling the completion handler. * The three types of completion are success, error, and unlink * (a software-induced fault, also called "request cancellation"). * * URBs may be submitted in interrupt context. * * The caller must have correctly initialized the URB before submitting * it. Functions such as usb_fill_bulk_urb() and usb_fill_control_urb() are * available to ensure that most fields are correctly initialized, for * the particular kind of transfer, although they will not initialize * any transfer flags. * * If the submission is successful, the complete() callback from the URB * will be called exactly once, when the USB core and Host Controller Driver * (HCD) are finished with the URB. When the completion function is called, * control of the URB is returned to the device driver which issued the * request. The completion handler may then immediately free or reuse that * URB. * * With few exceptions, USB device drivers should never access URB fields * provided by usbcore or the HCD until its complete() is called. * The exceptions relate to periodic transfer scheduling. For both * interrupt and isochronous urbs, as part of successful URB submission * urb->interval is modified to reflect the actual transfer period used * (normally some power of two units). And for isochronous urbs, * urb->start_frame is modified to reflect when the URB's transfers were * scheduled to start. * * Not all isochronous transfer scheduling policies will work, but most * host controller drivers should easily handle ISO queues going from now * until 10-200 msec into the future. Drivers should try to keep at * least one or two msec of data in the queue; many controllers require * that new transfers start at least 1 msec in the future when they are * added. If the driver is unable to keep up and the queue empties out, * the behavior for new submissions is governed by the URB_ISO_ASAP flag. * If the flag is set, or if the queue is idle, then the URB is always * assigned to the first available (and not yet expired) slot in the * endpoint's schedule. If the flag is not set and the queue is active * then the URB is always assigned to the next slot in the schedule * following the end of the endpoint's previous URB, even if that slot is * in the past. When a packet is assigned in this way to a slot that has * already expired, the packet is not transmitted and the corresponding * usb_iso_packet_descriptor's status field will return -EXDEV. If this * would happen to all the packets in the URB, submission fails with a * -EXDEV error code. * * For control endpoints, the synchronous usb_control_msg() call is * often used (in non-interrupt context) instead of this call. * That is often used through convenience wrappers, for the requests * that are standardized in the USB 2.0 specification. For bulk * endpoints, a synchronous usb_bulk_msg() call is available. * * Return: * 0 on successful submissions. A negative error number otherwise. * * Request Queuing: * * URBs may be submitted to endpoints before previous ones complete, to * minimize the impact of interrupt latencies and system overhead on data * throughput. With that queuing policy, an endpoint's queue would never * be empty. This is required for continuous isochronous data streams, * and may also be required for some kinds of interrupt transfers. Such * queuing also maximizes bandwidth utilization by letting USB controllers * start work on later requests before driver software has finished the * completion processing for earlier (successful) requests. * * As of Linux 2.6, all USB endpoint transfer queues support depths greater * than one. This was previously a HCD-specific behavior, except for ISO * transfers. Non-isochronous endpoint queues are inactive during cleanup * after faults (transfer errors or cancellation). * * Reserved Bandwidth Transfers: * * Periodic transfers (interrupt or isochronous) are performed repeatedly, * using the interval specified in the urb. Submitting the first urb to * the endpoint reserves the bandwidth necessary to make those transfers. * If the USB subsystem can't allocate sufficient bandwidth to perform * the periodic request, submitting such a periodic request should fail. * * For devices under xHCI, the bandwidth is reserved at configuration time, or * when the alt setting is selected. If there is not enough bus bandwidth, the * configuration/alt setting request will fail. Therefore, submissions to * periodic endpoints on devices under xHCI should never fail due to bandwidth * constraints. * * Device drivers must explicitly request that repetition, by ensuring that * some URB is always on the endpoint's queue (except possibly for short * periods during completion callbacks). When there is no longer an urb * queued, the endpoint's bandwidth reservation is canceled. This means * drivers can use their completion handlers to ensure they keep bandwidth * they need, by reinitializing and resubmitting the just-completed urb * until the driver longer needs that periodic bandwidth. * * Memory Flags: * * The general rules for how to decide which mem_flags to use * are the same as for kmalloc. There are four * different possible values; GFP_KERNEL, GFP_NOFS, GFP_NOIO and * GFP_ATOMIC. * * GFP_NOFS is not ever used, as it has not been implemented yet. * * GFP_ATOMIC is used when * (a) you are inside a completion handler, an interrupt, bottom half, * tasklet or timer, or * (b) you are holding a spinlock or rwlock (does not apply to * semaphores), or * (c) current->state != TASK_RUNNING, this is the case only after * you've changed it. * * GFP_NOIO is used in the block io path and error handling of storage * devices. * * All other situations use GFP_KERNEL. * * Some more specific rules for mem_flags can be inferred, such as * (1) start_xmit, timeout, and receive methods of network drivers must * use GFP_ATOMIC (they are called with a spinlock held); * (2) queuecommand methods of scsi drivers must use GFP_ATOMIC (also * called with a spinlock held); * (3) If you use a kernel thread with a network driver you must use * GFP_NOIO, unless (b) or (c) apply; * (4) after you have done a down() you can use GFP_KERNEL, unless (b) or (c) * apply or your are in a storage driver's block io path; * (5) USB probe and disconnect can use GFP_KERNEL unless (b) or (c) apply; and * (6) changing firmware on a running storage or net device uses * GFP_NOIO, unless b) or c) apply * */ int usb_submit_urb(struct urb *urb, gfp_t mem_flags) { int xfertype, max; struct usb_device *dev; struct usb_host_endpoint *ep; int is_out; unsigned int allowed; bool is_eusb2_isoch_double; if (!urb || !urb->complete) return -EINVAL; if (urb->hcpriv) { WARN_ONCE(1, "URB %p submitted while active\n", urb); return -EBUSY; } dev = urb->dev; if ((!dev) || (dev->state < USB_STATE_UNAUTHENTICATED)) return -ENODEV; /* For now, get the endpoint from the pipe. Eventually drivers * will be required to set urb->ep directly and we will eliminate * urb->pipe. */ ep = usb_pipe_endpoint(dev, urb->pipe); if (!ep) return -ENOENT; urb->ep = ep; urb->status = -EINPROGRESS; urb->actual_length = 0; /* Lots of sanity checks, so HCDs can rely on clean data * and don't need to duplicate tests */ xfertype = usb_endpoint_type(&ep->desc); if (xfertype == USB_ENDPOINT_XFER_CONTROL) { struct usb_ctrlrequest *setup = (struct usb_ctrlrequest *) urb->setup_packet; if (!setup) return -ENOEXEC; is_out = !(setup->bRequestType & USB_DIR_IN) || !setup->wLength; dev_WARN_ONCE(&dev->dev, (usb_pipeout(urb->pipe) != is_out), "BOGUS control dir, pipe %x doesn't match bRequestType %x\n", urb->pipe, setup->bRequestType); if (le16_to_cpu(setup->wLength) != urb->transfer_buffer_length) { dev_dbg(&dev->dev, "BOGUS control len %d doesn't match transfer length %d\n", le16_to_cpu(setup->wLength), urb->transfer_buffer_length); return -EBADR; } } else { is_out = usb_endpoint_dir_out(&ep->desc); } /* Clear the internal flags and cache the direction for later use */ urb->transfer_flags &= ~(URB_DIR_MASK | URB_DMA_MAP_SINGLE | URB_DMA_MAP_PAGE | URB_DMA_MAP_SG | URB_MAP_LOCAL | URB_SETUP_MAP_SINGLE | URB_SETUP_MAP_LOCAL | URB_DMA_SG_COMBINED); urb->transfer_flags |= (is_out ? URB_DIR_OUT : URB_DIR_IN); kmsan_handle_urb(urb, is_out); if (xfertype != USB_ENDPOINT_XFER_CONTROL && dev->state < USB_STATE_CONFIGURED) return -ENODEV; max = usb_endpoint_maxp(&ep->desc); is_eusb2_isoch_double = usb_endpoint_is_hs_isoc_double(dev, ep); if (!max && !is_eusb2_isoch_double) { dev_dbg(&dev->dev, "bogus endpoint ep%d%s in %s (bad maxpacket %d)\n", usb_endpoint_num(&ep->desc), is_out ? "out" : "in", __func__, max); return -EMSGSIZE; } /* periodic transfers limit size per frame/uframe, * but drivers only control those sizes for ISO. * while we're checking, initialize return status. */ if (xfertype == USB_ENDPOINT_XFER_ISOC) { int n, len; /* SuperSpeed isoc endpoints have up to 16 bursts of up to * 3 packets each */ if (dev->speed >= USB_SPEED_SUPER) { int burst = 1 + ep->ss_ep_comp.bMaxBurst; int mult = USB_SS_MULT(ep->ss_ep_comp.bmAttributes); max *= burst; max *= mult; } if (dev->speed == USB_SPEED_SUPER_PLUS && USB_SS_SSP_ISOC_COMP(ep->ss_ep_comp.bmAttributes)) { struct usb_ssp_isoc_ep_comp_descriptor *isoc_ep_comp; isoc_ep_comp = &ep->ssp_isoc_ep_comp; max = le32_to_cpu(isoc_ep_comp->dwBytesPerInterval); } /* High speed, 1-3 packets/uframe, max 6 for eUSB2 double bw */ if (dev->speed == USB_SPEED_HIGH) { if (is_eusb2_isoch_double) max = le32_to_cpu(ep->eusb2_isoc_ep_comp.dwBytesPerInterval); else max *= usb_endpoint_maxp_mult(&ep->desc); } if (urb->number_of_packets <= 0) return -EINVAL; for (n = 0; n < urb->number_of_packets; n++) { len = urb->iso_frame_desc[n].length; if (len < 0 || len > max) return -EMSGSIZE; urb->iso_frame_desc[n].status = -EXDEV; urb->iso_frame_desc[n].actual_length = 0; } } else if (urb->num_sgs && !urb->dev->bus->no_sg_constraint) { struct scatterlist *sg; int i; for_each_sg(urb->sg, sg, urb->num_sgs - 1, i) if (sg->length % max) return -EINVAL; } /* the I/O buffer must be mapped/unmapped, except when length=0 */ if (urb->transfer_buffer_length > INT_MAX) return -EMSGSIZE; /* * stuff that drivers shouldn't do, but which shouldn't * cause problems in HCDs if they get it wrong. */ /* Check that the pipe's type matches the endpoint's type */ if (usb_pipe_type_check(urb->dev, urb->pipe)) dev_warn_once(&dev->dev, "BOGUS urb xfer, pipe %x != type %x\n", usb_pipetype(urb->pipe), pipetypes[xfertype]); /* Check against a simple/standard policy */ allowed = (URB_NO_TRANSFER_DMA_MAP | URB_NO_INTERRUPT | URB_DIR_MASK | URB_FREE_BUFFER); switch (xfertype) { case USB_ENDPOINT_XFER_BULK: case USB_ENDPOINT_XFER_INT: if (is_out) allowed |= URB_ZERO_PACKET; fallthrough; default: /* all non-iso endpoints */ if (!is_out) allowed |= URB_SHORT_NOT_OK; break; case USB_ENDPOINT_XFER_ISOC: allowed |= URB_ISO_ASAP; break; } allowed &= urb->transfer_flags; /* warn if submitter gave bogus flags */ if (allowed != urb->transfer_flags) dev_WARN(&dev->dev, "BOGUS urb flags, %x --> %x\n", urb->transfer_flags, allowed); /* * Force periodic transfer intervals to be legal values that are * a power of two (so HCDs don't need to). * * FIXME want bus->{intr,iso}_sched_horizon values here. Each HC * supports different values... this uses EHCI/UHCI defaults (and * EHCI can use smaller non-default values). */ switch (xfertype) { case USB_ENDPOINT_XFER_ISOC: case USB_ENDPOINT_XFER_INT: /* too small? */ if (urb->interval <= 0) return -EINVAL; /* too big? */ switch (dev->speed) { case USB_SPEED_SUPER_PLUS: case USB_SPEED_SUPER: /* units are 125us */ /* Handle up to 2^(16-1) microframes */ if (urb->interval > (1 << 15)) return -EINVAL; max = 1 << 15; break; case USB_SPEED_HIGH: /* units are microframes */ /* NOTE usb handles 2^15 */ if (urb->interval > (1024 * 8)) urb->interval = 1024 * 8; max = 1024 * 8; break; case USB_SPEED_FULL: /* units are frames/msec */ case USB_SPEED_LOW: if (xfertype == USB_ENDPOINT_XFER_INT) { if (urb->interval > 255) return -EINVAL; /* NOTE ohci only handles up to 32 */ max = 128; } else { if (urb->interval > 1024) urb->interval = 1024; /* NOTE usb and ohci handle up to 2^15 */ max = 1024; } break; default: return -EINVAL; } /* Round down to a power of 2, no more than max */ urb->interval = min(max, 1 << ilog2(urb->interval)); } return usb_hcd_submit_urb(urb, mem_flags); } EXPORT_SYMBOL_GPL(usb_submit_urb); /*-------------------------------------------------------------------*/ /** * usb_unlink_urb - abort/cancel a transfer request for an endpoint * @urb: pointer to urb describing a previously submitted request, * may be NULL * * This routine cancels an in-progress request. URBs complete only once * per submission, and may be canceled only once per submission. * Successful cancellation means termination of @urb will be expedited * and the completion handler will be called with a status code * indicating that the request has been canceled (rather than any other * code). * * Drivers should not call this routine or related routines, such as * usb_kill_urb(), after their disconnect method has returned. The * disconnect function should synchronize with a driver's I/O routines * to insure that all URB-related activity has completed before it returns. * * This request is asynchronous, however the HCD might call the ->complete() * callback during unlink. Therefore when drivers call usb_unlink_urb(), they * must not hold any locks that may be taken by the completion function. * Success is indicated by returning -EINPROGRESS, at which time the URB will * probably not yet have been given back to the device driver. When it is * eventually called, the completion function will see @urb->status == * -ECONNRESET. * Failure is indicated by usb_unlink_urb() returning any other value. * Unlinking will fail when @urb is not currently "linked" (i.e., it was * never submitted, or it was unlinked before, or the hardware is already * finished with it), even if the completion handler has not yet run. * * The URB must not be deallocated while this routine is running. In * particular, when a driver calls this routine, it must insure that the * completion handler cannot deallocate the URB. * * Return: -EINPROGRESS on success. See description for other values on * failure. * * Unlinking and Endpoint Queues: * * [The behaviors and guarantees described below do not apply to virtual * root hubs but only to endpoint queues for physical USB devices.] * * Host Controller Drivers (HCDs) place all the URBs for a particular * endpoint in a queue. Normally the queue advances as the controller * hardware processes each request. But when an URB terminates with an * error its queue generally stops (see below), at least until that URB's * completion routine returns. It is guaranteed that a stopped queue * will not restart until all its unlinked URBs have been fully retired, * with their completion routines run, even if that's not until some time * after the original completion handler returns. The same behavior and * guarantee apply when an URB terminates because it was unlinked. * * Bulk and interrupt endpoint queues are guaranteed to stop whenever an * URB terminates with any sort of error, including -ECONNRESET, -ENOENT, * and -EREMOTEIO. Control endpoint queues behave the same way except * that they are not guaranteed to stop for -EREMOTEIO errors. Queues * for isochronous endpoints are treated differently, because they must * advance at fixed rates. Such queues do not stop when an URB * encounters an error or is unlinked. An unlinked isochronous URB may * leave a gap in the stream of packets; it is undefined whether such * gaps can be filled in. * * Note that early termination of an URB because a short packet was * received will generate a -EREMOTEIO error if and only if the * URB_SHORT_NOT_OK flag is set. By setting this flag, USB device * drivers can build deep queues for large or complex bulk transfers * and clean them up reliably after any sort of aborted transfer by * unlinking all pending URBs at the first fault. * * When a control URB terminates with an error other than -EREMOTEIO, it * is quite likely that the status stage of the transfer will not take * place. */ int usb_unlink_urb(struct urb *urb) { if (!urb) return -EINVAL; if (!urb->dev) return -ENODEV; if (!urb->ep) return -EIDRM; return usb_hcd_unlink_urb(urb, -ECONNRESET); } EXPORT_SYMBOL_GPL(usb_unlink_urb); /** * usb_kill_urb - cancel a transfer request and wait for it to finish * @urb: pointer to URB describing a previously submitted request, * may be NULL * * This routine cancels an in-progress request. It is guaranteed that * upon return all completion handlers will have finished and the URB * will be totally idle and available for reuse. These features make * this an ideal way to stop I/O in a disconnect() callback or close() * function. If the request has not already finished or been unlinked * the completion handler will see urb->status == -ENOENT. * * While the routine is running, attempts to resubmit the URB will fail * with error -EPERM. Thus even if the URB's completion handler always * tries to resubmit, it will not succeed and the URB will become idle. * * The URB must not be deallocated while this routine is running. In * particular, when a driver calls this routine, it must insure that the * completion handler cannot deallocate the URB. * * This routine may not be used in an interrupt context (such as a bottom * half or a completion handler), or when holding a spinlock, or in other * situations where the caller can't schedule(). * * This routine should not be called by a driver after its disconnect * method has returned. */ void usb_kill_urb(struct urb *urb) { might_sleep(); if (!(urb && urb->dev && urb->ep)) return; atomic_inc(&urb->reject); /* * Order the write of urb->reject above before the read * of urb->use_count below. Pairs with the barriers in * __usb_hcd_giveback_urb() and usb_hcd_submit_urb(). */ smp_mb__after_atomic(); usb_hcd_unlink_urb(urb, -ENOENT); wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0); atomic_dec(&urb->reject); } EXPORT_SYMBOL_GPL(usb_kill_urb); /** * usb_poison_urb - reliably kill a transfer and prevent further use of an URB * @urb: pointer to URB describing a previously submitted request, * may be NULL * * This routine cancels an in-progress request. It is guaranteed that * upon return all completion handlers will have finished and the URB * will be totally idle and cannot be reused. These features make * this an ideal way to stop I/O in a disconnect() callback. * If the request has not already finished or been unlinked * the completion handler will see urb->status == -ENOENT. * * After and while the routine runs, attempts to resubmit the URB will fail * with error -EPERM. Thus even if the URB's completion handler always * tries to resubmit, it will not succeed and the URB will become idle. * * The URB must not be deallocated while this routine is running. In * particular, when a driver calls this routine, it must insure that the * completion handler cannot deallocate the URB. * * This routine may not be used in an interrupt context (such as a bottom * half or a completion handler), or when holding a spinlock, or in other * situations where the caller can't schedule(). * * This routine should not be called by a driver after its disconnect * method has returned. */ void usb_poison_urb(struct urb *urb) { might_sleep(); if (!urb) return; atomic_inc(&urb->reject); /* * Order the write of urb->reject above before the read * of urb->use_count below. Pairs with the barriers in * __usb_hcd_giveback_urb() and usb_hcd_submit_urb(). */ smp_mb__after_atomic(); if (!urb->dev || !urb->ep) return; usb_hcd_unlink_urb(urb, -ENOENT); wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0); } EXPORT_SYMBOL_GPL(usb_poison_urb); void usb_unpoison_urb(struct urb *urb) { if (!urb) return; atomic_dec(&urb->reject); } EXPORT_SYMBOL_GPL(usb_unpoison_urb); /** * usb_block_urb - reliably prevent further use of an URB * @urb: pointer to URB to be blocked, may be NULL * * After the routine has run, attempts to resubmit the URB will fail * with error -EPERM. Thus even if the URB's completion handler always * tries to resubmit, it will not succeed and the URB will become idle. * * The URB must not be deallocated while this routine is running. In * particular, when a driver calls this routine, it must insure that the * completion handler cannot deallocate the URB. */ void usb_block_urb(struct urb *urb) { if (!urb) return; atomic_inc(&urb->reject); } EXPORT_SYMBOL_GPL(usb_block_urb); /** * usb_kill_anchored_urbs - kill all URBs associated with an anchor * @anchor: anchor the requests are bound to * * This kills all outstanding URBs starting from the back of the queue, * with guarantee that no completer callbacks will take place from the * anchor after this function returns. * * This routine should not be called by a driver after its disconnect * method has returned. */ void usb_kill_anchored_urbs(struct usb_anchor *anchor) { struct urb *victim; int surely_empty; do { spin_lock_irq(&anchor->lock); while (!list_empty(&anchor->urb_list)) { victim = list_entry(anchor->urb_list.prev, struct urb, anchor_list); /* make sure the URB isn't freed before we kill it */ usb_get_urb(victim); spin_unlock_irq(&anchor->lock); /* this will unanchor the URB */ usb_kill_urb(victim); usb_put_urb(victim); spin_lock_irq(&anchor->lock); } surely_empty = usb_anchor_check_wakeup(anchor); spin_unlock_irq(&anchor->lock); cpu_relax(); } while (!surely_empty); } EXPORT_SYMBOL_GPL(usb_kill_anchored_urbs); /** * usb_poison_anchored_urbs - cease all traffic from an anchor * @anchor: anchor the requests are bound to * * this allows all outstanding URBs to be poisoned starting * from the back of the queue. Newly added URBs will also be * poisoned * * This routine should not be called by a driver after its disconnect * method has returned. */ void usb_poison_anchored_urbs(struct usb_anchor *anchor) { struct urb *victim; int surely_empty; do { spin_lock_irq(&anchor->lock); anchor->poisoned = 1; while (!list_empty(&anchor->urb_list)) { victim = list_entry(anchor->urb_list.prev, struct urb, anchor_list); /* make sure the URB isn't freed before we kill it */ usb_get_urb(victim); spin_unlock_irq(&anchor->lock); /* this will unanchor the URB */ usb_poison_urb(victim); usb_put_urb(victim); spin_lock_irq(&anchor->lock); } surely_empty = usb_anchor_check_wakeup(anchor); spin_unlock_irq(&anchor->lock); cpu_relax(); } while (!surely_empty); } EXPORT_SYMBOL_GPL(usb_poison_anchored_urbs); /** * usb_unpoison_anchored_urbs - let an anchor be used successfully again * @anchor: anchor the requests are bound to * * Reverses the effect of usb_poison_anchored_urbs * the anchor can be used normally after it returns */ void usb_unpoison_anchored_urbs(struct usb_anchor *anchor) { unsigned long flags; struct urb *lazarus; spin_lock_irqsave(&anchor->lock, flags); list_for_each_entry(lazarus, &anchor->urb_list, anchor_list) { usb_unpoison_urb(lazarus); } anchor->poisoned = 0; spin_unlock_irqrestore(&anchor->lock, flags); } EXPORT_SYMBOL_GPL(usb_unpoison_anchored_urbs); /** * usb_anchor_suspend_wakeups * @anchor: the anchor you want to suspend wakeups on * * Call this to stop the last urb being unanchored from waking up any * usb_wait_anchor_empty_timeout waiters. This is used in the hcd urb give- * back path to delay waking up until after the completion handler has run. */ void usb_anchor_suspend_wakeups(struct usb_anchor *anchor) { if (anchor) atomic_inc(&anchor->suspend_wakeups); } EXPORT_SYMBOL_GPL(usb_anchor_suspend_wakeups); /** * usb_anchor_resume_wakeups * @anchor: the anchor you want to resume wakeups on * * Allow usb_wait_anchor_empty_timeout waiters to be woken up again, and * wake up any current waiters if the anchor is empty. */ void usb_anchor_resume_wakeups(struct usb_anchor *anchor) { if (!anchor) return; atomic_dec(&anchor->suspend_wakeups); if (usb_anchor_check_wakeup(anchor)) wake_up(&anchor->wait); } EXPORT_SYMBOL_GPL(usb_anchor_resume_wakeups); /** * usb_wait_anchor_empty_timeout - wait for an anchor to be unused * @anchor: the anchor you want to become unused * @timeout: how long you are willing to wait in milliseconds * * Call this is you want to be sure all an anchor's * URBs have finished * * Return: Non-zero if the anchor became unused. Zero on timeout. */ int usb_wait_anchor_empty_timeout(struct usb_anchor *anchor, unsigned int timeout) { return wait_event_timeout(anchor->wait, usb_anchor_check_wakeup(anchor), msecs_to_jiffies(timeout)); } EXPORT_SYMBOL_GPL(usb_wait_anchor_empty_timeout); /** * usb_get_from_anchor - get an anchor's oldest urb * @anchor: the anchor whose urb you want * * This will take the oldest urb from an anchor, * unanchor and return it * * Return: The oldest urb from @anchor, or %NULL if @anchor has no * urbs associated with it. */ struct urb *usb_get_from_anchor(struct usb_anchor *anchor) { struct urb *victim; unsigned long flags; spin_lock_irqsave(&anchor->lock, flags); if (!list_empty(&anchor->urb_list)) { victim = list_entry(anchor->urb_list.next, struct urb, anchor_list); usb_get_urb(victim); __usb_unanchor_urb(victim, anchor); } else { victim = NULL; } spin_unlock_irqrestore(&anchor->lock, flags); return victim; } EXPORT_SYMBOL_GPL(usb_get_from_anchor); /** * usb_scuttle_anchored_urbs - unanchor all an anchor's urbs * @anchor: the anchor whose urbs you want to unanchor * * use this to get rid of all an anchor's urbs */ void usb_scuttle_anchored_urbs(struct usb_anchor *anchor) { struct urb *victim; unsigned long flags; int surely_empty; do { spin_lock_irqsave(&anchor->lock, flags); while (!list_empty(&anchor->urb_list)) { victim = list_entry(anchor->urb_list.prev, struct urb, anchor_list); __usb_unanchor_urb(victim, anchor); } surely_empty = usb_anchor_check_wakeup(anchor); spin_unlock_irqrestore(&anchor->lock, flags); cpu_relax(); } while (!surely_empty); } EXPORT_SYMBOL_GPL(usb_scuttle_anchored_urbs); /** * usb_anchor_empty - is an anchor empty * @anchor: the anchor you want to query * * Return: 1 if the anchor has no urbs associated with it. */ int usb_anchor_empty(struct usb_anchor *anchor) { return list_empty(&anchor->urb_list); } EXPORT_SYMBOL_GPL(usb_anchor_empty); |
| 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Syncookies implementation for the Linux kernel * * Copyright (C) 1997 Andi Kleen * Based on ideas by D.J.Bernstein and Eric Schenk. */ #include <linux/tcp.h> #include <linux/siphash.h> #include <linux/kernel.h> #include <linux/export.h> #include <net/secure_seq.h> #include <net/tcp.h> #include <net/tcp_ecn.h> #include <net/route.h> static siphash_aligned_key_t syncookie_secret[2]; #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) /* TCP Timestamp: 6 lowest bits of timestamp sent in the cookie SYN-ACK * stores TCP options: * * MSB LSB * | 31 ... 6 | 5 | 4 | 3 2 1 0 | * | Timestamp | ECN | SACK | WScale | * * When we receive a valid cookie-ACK, we look at the echoed tsval (if * any) to figure out which TCP options we should use for the rebuilt * connection. * * A WScale setting of '0xf' (which is an invalid scaling value) * means that original syn did not include the TCP window scaling option. */ #define TS_OPT_WSCALE_MASK 0xf #define TS_OPT_SACK BIT(4) #define TS_OPT_ECN BIT(5) /* There is no TS_OPT_TIMESTAMP: * if ACK contains timestamp option, we already know it was * requested/supported by the syn/synack exchange. */ #define TSBITS 6 static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, u32 count, int c) { net_get_random_once(syncookie_secret, sizeof(syncookie_secret)); return siphash_4u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, count, &syncookie_secret[c]); } /* * when syncookies are in effect and tcp timestamps are enabled we encode * tcp options in the lower bits of the timestamp value that will be * sent in the syn-ack. * Since subsequent timestamps use the normal tcp_time_stamp value, we * must make sure that the resulting initial timestamp is <= tcp_time_stamp. */ u64 cookie_init_timestamp(struct request_sock *req, u64 now) { const struct inet_request_sock *ireq = inet_rsk(req); u64 ts, ts_now = tcp_ns_to_ts(false, now); u32 options = 0; options = ireq->wscale_ok ? ireq->snd_wscale : TS_OPT_WSCALE_MASK; if (ireq->sack_ok) options |= TS_OPT_SACK; if (ireq->ecn_ok) options |= TS_OPT_ECN; ts = (ts_now >> TSBITS) << TSBITS; ts |= options; if (ts > ts_now) ts -= (1UL << TSBITS); if (tcp_rsk(req)->req_usec_ts) return ts * NSEC_PER_USEC; return ts * NSEC_PER_MSEC; } static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, __u32 sseq, __u32 data) { /* * Compute the secure sequence number. * The output should be: * HASH(sec1,saddr,sport,daddr,dport,sec1) + sseq + (count * 2^24) * + (HASH(sec2,saddr,sport,daddr,dport,count,sec2) % 2^24). * Where sseq is their sequence number and count increases every * minute by 1. * As an extra hack, we add a small "data" value that encodes the * MSS into the second hash value. */ u32 count = tcp_cookie_time(); return (cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq + (count << COOKIEBITS) + ((cookie_hash(saddr, daddr, sport, dport, count, 1) + data) & COOKIEMASK)); } /* * This retrieves the small "data" value from the syncookie. * If the syncookie is bad, the data returned will be out of * range. This must be checked by the caller. * * The count value used to generate the cookie must be less than * MAX_SYNCOOKIE_AGE minutes in the past. * The return value (__u32)-1 if this test fails. */ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, __be16 sport, __be16 dport, __u32 sseq) { u32 diff, count = tcp_cookie_time(); /* Strip away the layers from the cookie */ cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; /* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */ diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS); if (diff >= MAX_SYNCOOKIE_AGE) return (__u32)-1; return (cookie - cookie_hash(saddr, daddr, sport, dport, count - diff, 1)) & COOKIEMASK; /* Leaving the data behind */ } /* * MSS Values are chosen based on the 2011 paper * 'An Analysis of TCP Maximum Segement Sizes' by S. Alcock and R. Nelson. * Values .. * .. lower than 536 are rare (< 0.2%) * .. between 537 and 1299 account for less than < 1.5% of observed values * .. in the 1300-1349 range account for about 15 to 20% of observed mss values * .. exceeding 1460 are very rare (< 0.04%) * * 1460 is the single most frequently announced mss value (30 to 46% depending * on monitor location). Table must be sorted. */ static __u16 const msstab[] = { 536, 1300, 1440, /* 1440, 1452: PPPoE */ 1460, }; /* * Generate a syncookie. mssp points to the mss, which is returned * rounded down to the value encoded in the cookie. */ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, u16 *mssp) { int mssind; const __u16 mss = *mssp; for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) if (mss >= msstab[mssind]) break; *mssp = msstab[mssind]; return secure_tcp_syn_cookie(iph->saddr, iph->daddr, th->source, th->dest, ntohl(th->seq), mssind); } EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence); __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp) { const struct iphdr *iph = ip_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); return __cookie_v4_init_sequence(iph, th, mssp); } /* * Check if a ack sequence number is a valid syncookie. * Return the decoded mss if it is, or 0 if not. */ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th) { __u32 cookie = ntohl(th->ack_seq) - 1; __u32 seq = ntohl(th->seq) - 1; __u32 mssind; mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, th->source, th->dest, seq); return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } EXPORT_SYMBOL_GPL(__cookie_v4_check); struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; bool own_req; child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, NULL, &own_req); if (child) { refcount_set(&req->rsk_refcnt, 1); sock_rps_save_rxhash(child, skb); if (rsk_drop_req(req)) { reqsk_put(req); return child; } if (inet_csk_reqsk_queue_add(sk, req, child)) return child; bh_unlock_sock(child); sock_put(child); } __reqsk_free(req); return NULL; } EXPORT_IPV6_MOD(tcp_get_cookie_sock); /* * when syncookies are in effect and tcp timestamps are enabled we stored * additional tcp options in the timestamp. * This extracts these options from the timestamp echo. * * return false if we decode a tcp option that is disabled * on the host. */ bool cookie_timestamp_decode(const struct net *net, struct tcp_options_received *tcp_opt) { /* echoed timestamp, lowest bits contain options */ u32 options = tcp_opt->rcv_tsecr; if (!tcp_opt->saw_tstamp) { tcp_clear_options(tcp_opt); return true; } if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps)) return false; tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0; if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack)) return false; if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK) return true; /* no window scaling */ tcp_opt->wscale_ok = 1; tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK; return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0; } EXPORT_IPV6_MOD(cookie_timestamp_decode); static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); const struct tcphdr *th = tcp_hdr(skb); req->num_retrans = 0; ireq->ir_num = ntohs(th->dest); ireq->ir_rmt_port = th->source; ireq->ir_iif = inet_request_bound_dev_if(sk, skb); ireq->ir_mark = inet_request_mark(sk, skb); if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; treq->snt_synack = 0; treq->snt_tsval_first = 0; treq->tfo_listener = false; treq->txhash = net_tx_rndhash(); treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = ntohl(th->ack_seq) - 1; treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; treq->req_usec_ts = false; #if IS_ENABLED(CONFIG_MPTCP) treq->is_mptcp = sk_is_mptcp(sk); if (treq->is_mptcp) return mptcp_subflow_init_cookie_req(req, sk, skb); #endif return 0; } #if IS_ENABLED(CONFIG_BPF) struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb) { struct request_sock *req = inet_reqsk(skb->sk); skb->sk = NULL; skb->destructor = NULL; if (cookie_tcp_reqsk_init(sk, skb, req)) { reqsk_free(req); req = NULL; } return req; } EXPORT_IPV6_MOD_GPL(cookie_bpf_check); #endif struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk, struct sk_buff *skb, struct tcp_options_received *tcp_opt, int mss, u32 tsoff) { struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct request_sock *req; if (sk_is_mptcp(sk)) req = mptcp_subflow_reqsk_alloc(ops, sk, false); else req = inet_reqsk_alloc(ops, sk, false); if (!req) return NULL; if (cookie_tcp_reqsk_init(sk, skb, req)) { reqsk_free(req); return NULL; } ireq = inet_rsk(req); treq = tcp_rsk(req); req->mss = mss; req->ts_recent = tcp_opt->saw_tstamp ? tcp_opt->rcv_tsval : 0; ireq->snd_wscale = tcp_opt->snd_wscale; ireq->tstamp_ok = tcp_opt->saw_tstamp; ireq->sack_ok = tcp_opt->sack_ok; ireq->wscale_ok = tcp_opt->wscale_ok; ireq->ecn_ok = !!(tcp_opt->rcv_tsecr & TS_OPT_ECN); treq->ts_off = tsoff; return req; } EXPORT_IPV6_MOD_GPL(cookie_tcp_reqsk_alloc); static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tcp_opt; u32 tsoff = 0; int mss; if (tcp_synq_no_recent_overflow(sk)) goto out; mss = __cookie_v4_check(ip_hdr(skb), tcp_hdr(skb)); if (!mss) { __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; } __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(net, skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { tsoff = secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); tcp_opt.rcv_tsecr -= tsoff; } if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; return cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb, &tcp_opt, mss, tsoff); out: return ERR_PTR(-EINVAL); } /* On input, sk is a listener. * Output is listener if incoming packet would not create a child * NULL if memory could not be allocated. */ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; const struct tcphdr *th = tcp_hdr(skb); struct tcp_sock *tp = tcp_sk(sk); struct inet_request_sock *ireq; struct net *net = sock_net(sk); struct tcp_request_sock *treq; struct request_sock *req; struct sock *ret = sk; struct flowi4 fl4; struct rtable *rt; __u8 rcv_wscale; int full_space; SKB_DR(reason); if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) goto out; if (cookie_bpf_ok(skb)) { req = cookie_bpf_check(sk, skb); } else { req = cookie_tcp_check(net, sk, skb); if (IS_ERR(req)) goto out; } if (!req) { SKB_DR_SET(reason, NO_SOCKET); goto out_drop; } ireq = inet_rsk(req); treq = tcp_rsk(req); sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); if (security_inet_conn_request(sk, skb, req)) { SKB_DR_SET(reason, SECURITY_HOOK); goto out_free; } tcp_ao_syncookie(sk, skb, req, AF_INET); /* * We need to lookup the route here to get at the correct * window size. We should better make sure that the window size * hasn't changed since we received the original syn, but I see * no easy way to do this. */ flowi4_init_output(&fl4, ireq->ir_iif, ireq->ir_mark, ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, th->source, th->dest, sk_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { SKB_DR_SET(reason, IP_OUTNOROUTES); goto out_free; } /* Try to redo what tcp_v4_send_synack did. */ req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? : dst_metric(&rt->dst, RTAX_WINDOW); /* limit the window selection if the user enforce a smaller rx buffer */ full_space = tcp_full_space(sk); if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) req->rsk_window_clamp = full_space; tcp_select_initial_window(sk, full_space, req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(&rt->dst, RTAX_INITRWND)); /* req->syncookie is set true only if ACK is validated * by BPF kfunc, then, rcv_wscale is already configured. */ if (!req->syncookie) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst); treq->accecn_ok = ireq->ecn_ok && cookie_accecn_ok(th); ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst); /* ip_queue_xmit() depends on our flow being setup * Normal sockets get it right from inet_csk_route_child_sock() */ if (!ret) { SKB_DR_SET(reason, NO_SOCKET); goto out_drop; } inet_sk(ret)->cork.fl.u.ip4 = fl4; out: return ret; out_free: reqsk_free(req); out_drop: sk_skb_reason_drop(sk, skb, reason); return NULL; } |
| 8 8 8 1 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 | // SPDX-License-Identifier: GPL-2.0-only /* * SCSI RDMA (SRP) transport class * * Copyright (C) 2007 FUJITA Tomonori <tomof@acm.org> */ #include <linux/init.h> #include <linux/module.h> #include <linux/jiffies.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/string.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_device.h> #include <scsi/scsi_host.h> #include <scsi/scsi_transport.h> #include <scsi/scsi_transport_srp.h> #include "scsi_priv.h" struct srp_host_attrs { atomic_t next_port_id; }; #define to_srp_host_attrs(host) ((struct srp_host_attrs *)(host)->shost_data) #define SRP_HOST_ATTRS 0 #define SRP_RPORT_ATTRS 8 struct srp_internal { struct scsi_transport_template t; struct srp_function_template *f; struct device_attribute *host_attrs[SRP_HOST_ATTRS + 1]; struct device_attribute *rport_attrs[SRP_RPORT_ATTRS + 1]; struct transport_container rport_attr_cont; }; static int scsi_is_srp_rport(const struct device *dev); #define to_srp_internal(tmpl) container_of(tmpl, struct srp_internal, t) #define dev_to_rport(d) container_of(d, struct srp_rport, dev) #define transport_class_to_srp_rport(dev) dev_to_rport((dev)->parent) static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r) { return dev_to_shost(r->dev.parent); } static int find_child_rport(struct device *dev, void *data) { struct device **child = data; if (scsi_is_srp_rport(dev)) { WARN_ON_ONCE(*child); *child = dev; } return 0; } static inline struct srp_rport *shost_to_rport(struct Scsi_Host *shost) { struct device *child = NULL; WARN_ON_ONCE(device_for_each_child(&shost->shost_gendev, &child, find_child_rport) < 0); return child ? dev_to_rport(child) : NULL; } /** * srp_tmo_valid() - check timeout combination validity * @reconnect_delay: Reconnect delay in seconds. * @fast_io_fail_tmo: Fast I/O fail timeout in seconds. * @dev_loss_tmo: Device loss timeout in seconds. * * The combination of the timeout parameters must be such that SCSI commands * are finished in a reasonable time. Hence do not allow the fast I/O fail * timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT nor allow dev_loss_tmo to * exceed that limit if failing I/O fast has been disabled. Furthermore, these * parameters must be such that multipath can detect failed paths timely. * Hence do not allow all three parameters to be disabled simultaneously. */ int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, long dev_loss_tmo) { if (reconnect_delay < 0 && fast_io_fail_tmo < 0 && dev_loss_tmo < 0) return -EINVAL; if (reconnect_delay == 0) return -EINVAL; if (fast_io_fail_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT) return -EINVAL; if (fast_io_fail_tmo < 0 && dev_loss_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT) return -EINVAL; if (dev_loss_tmo >= LONG_MAX / HZ) return -EINVAL; if (fast_io_fail_tmo >= 0 && dev_loss_tmo >= 0 && fast_io_fail_tmo >= dev_loss_tmo) return -EINVAL; return 0; } EXPORT_SYMBOL_GPL(srp_tmo_valid); static int srp_host_setup(struct transport_container *tc, struct device *dev, struct device *cdev) { struct Scsi_Host *shost = dev_to_shost(dev); struct srp_host_attrs *srp_host = to_srp_host_attrs(shost); atomic_set(&srp_host->next_port_id, 0); return 0; } static DECLARE_TRANSPORT_CLASS(srp_host_class, "srp_host", srp_host_setup, NULL, NULL); static DECLARE_TRANSPORT_CLASS(srp_rport_class, "srp_remote_ports", NULL, NULL, NULL); static ssize_t show_srp_rport_id(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_rport *rport = transport_class_to_srp_rport(dev); return sprintf(buf, "%16phC\n", rport->port_id); } static DEVICE_ATTR(port_id, S_IRUGO, show_srp_rport_id, NULL); static const struct { u32 value; char *name; } srp_rport_role_names[] = { {SRP_RPORT_ROLE_INITIATOR, "SRP Initiator"}, {SRP_RPORT_ROLE_TARGET, "SRP Target"}, }; static ssize_t show_srp_rport_roles(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_rport *rport = transport_class_to_srp_rport(dev); int i; char *name = NULL; for (i = 0; i < ARRAY_SIZE(srp_rport_role_names); i++) if (srp_rport_role_names[i].value == rport->roles) { name = srp_rport_role_names[i].name; break; } return sprintf(buf, "%s\n", name ? : "unknown"); } static DEVICE_ATTR(roles, S_IRUGO, show_srp_rport_roles, NULL); static ssize_t store_srp_rport_delete(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct srp_rport *rport = transport_class_to_srp_rport(dev); struct Scsi_Host *shost = dev_to_shost(dev); struct srp_internal *i = to_srp_internal(shost->transportt); if (i->f->rport_delete) { i->f->rport_delete(rport); return count; } else { return -ENOSYS; } } static DEVICE_ATTR(delete, S_IWUSR, NULL, store_srp_rport_delete); static ssize_t show_srp_rport_state(struct device *dev, struct device_attribute *attr, char *buf) { static const char *const state_name[] = { [SRP_RPORT_RUNNING] = "running", [SRP_RPORT_BLOCKED] = "blocked", [SRP_RPORT_FAIL_FAST] = "fail-fast", [SRP_RPORT_LOST] = "lost", }; struct srp_rport *rport = transport_class_to_srp_rport(dev); enum srp_rport_state state = rport->state; return sprintf(buf, "%s\n", (unsigned)state < ARRAY_SIZE(state_name) ? state_name[state] : "???"); } static DEVICE_ATTR(state, S_IRUGO, show_srp_rport_state, NULL); static ssize_t srp_show_tmo(char *buf, int tmo) { return tmo >= 0 ? sprintf(buf, "%d\n", tmo) : sprintf(buf, "off\n"); } int srp_parse_tmo(int *tmo, const char *buf) { int res = 0; if (strncmp(buf, "off", 3) != 0) res = kstrtoint(buf, 0, tmo); else *tmo = -1; return res; } EXPORT_SYMBOL(srp_parse_tmo); static ssize_t show_reconnect_delay(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_rport *rport = transport_class_to_srp_rport(dev); return srp_show_tmo(buf, rport->reconnect_delay); } static ssize_t store_reconnect_delay(struct device *dev, struct device_attribute *attr, const char *buf, const size_t count) { struct srp_rport *rport = transport_class_to_srp_rport(dev); int res, delay; res = srp_parse_tmo(&delay, buf); if (res) goto out; res = srp_tmo_valid(delay, rport->fast_io_fail_tmo, rport->dev_loss_tmo); if (res) goto out; if (rport->reconnect_delay <= 0 && delay > 0 && rport->state != SRP_RPORT_RUNNING) { queue_delayed_work(system_long_wq, &rport->reconnect_work, delay * HZ); } else if (delay <= 0) { cancel_delayed_work(&rport->reconnect_work); } rport->reconnect_delay = delay; res = count; out: return res; } static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR, show_reconnect_delay, store_reconnect_delay); static ssize_t show_failed_reconnects(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_rport *rport = transport_class_to_srp_rport(dev); return sprintf(buf, "%d\n", rport->failed_reconnects); } static DEVICE_ATTR(failed_reconnects, S_IRUGO, show_failed_reconnects, NULL); static ssize_t show_srp_rport_fast_io_fail_tmo(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_rport *rport = transport_class_to_srp_rport(dev); return srp_show_tmo(buf, rport->fast_io_fail_tmo); } static ssize_t store_srp_rport_fast_io_fail_tmo(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct srp_rport *rport = transport_class_to_srp_rport(dev); int res; int fast_io_fail_tmo; res = srp_parse_tmo(&fast_io_fail_tmo, buf); if (res) goto out; res = srp_tmo_valid(rport->reconnect_delay, fast_io_fail_tmo, rport->dev_loss_tmo); if (res) goto out; rport->fast_io_fail_tmo = fast_io_fail_tmo; res = count; out: return res; } static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR, show_srp_rport_fast_io_fail_tmo, store_srp_rport_fast_io_fail_tmo); static ssize_t show_srp_rport_dev_loss_tmo(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_rport *rport = transport_class_to_srp_rport(dev); return srp_show_tmo(buf, rport->dev_loss_tmo); } static ssize_t store_srp_rport_dev_loss_tmo(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct srp_rport *rport = transport_class_to_srp_rport(dev); int res; int dev_loss_tmo; res = srp_parse_tmo(&dev_loss_tmo, buf); if (res) goto out; res = srp_tmo_valid(rport->reconnect_delay, rport->fast_io_fail_tmo, dev_loss_tmo); if (res) goto out; rport->dev_loss_tmo = dev_loss_tmo; res = count; out: return res; } static DEVICE_ATTR(dev_loss_tmo, S_IRUGO | S_IWUSR, show_srp_rport_dev_loss_tmo, store_srp_rport_dev_loss_tmo); static int srp_rport_set_state(struct srp_rport *rport, enum srp_rport_state new_state) { enum srp_rport_state old_state = rport->state; lockdep_assert_held(&rport->mutex); switch (new_state) { case SRP_RPORT_RUNNING: switch (old_state) { case SRP_RPORT_LOST: goto invalid; default: break; } break; case SRP_RPORT_BLOCKED: switch (old_state) { case SRP_RPORT_RUNNING: break; default: goto invalid; } break; case SRP_RPORT_FAIL_FAST: switch (old_state) { case SRP_RPORT_LOST: goto invalid; default: break; } break; case SRP_RPORT_LOST: break; } rport->state = new_state; return 0; invalid: return -EINVAL; } /** * srp_reconnect_work() - reconnect and schedule a new attempt if necessary * @work: Work structure used for scheduling this operation. */ static void srp_reconnect_work(struct work_struct *work) { struct srp_rport *rport = container_of(to_delayed_work(work), struct srp_rport, reconnect_work); struct Scsi_Host *shost = rport_to_shost(rport); int delay, res; res = srp_reconnect_rport(rport); if (res != 0) { shost_printk(KERN_ERR, shost, "reconnect attempt %d failed (%d)\n", ++rport->failed_reconnects, res); delay = rport->reconnect_delay * clamp(rport->failed_reconnects - 10, 1, 100); if (delay > 0) queue_delayed_work(system_long_wq, &rport->reconnect_work, delay * HZ); } } /* * scsi_block_targets() must have been called before this function is * called to guarantee that no .queuecommand() calls are in progress. */ static void __rport_fail_io_fast(struct srp_rport *rport) { struct Scsi_Host *shost = rport_to_shost(rport); struct srp_internal *i; lockdep_assert_held(&rport->mutex); if (srp_rport_set_state(rport, SRP_RPORT_FAIL_FAST)) return; scsi_target_unblock(rport->dev.parent, SDEV_TRANSPORT_OFFLINE); /* Involve the LLD if possible to terminate all I/O on the rport. */ i = to_srp_internal(shost->transportt); if (i->f->terminate_rport_io) i->f->terminate_rport_io(rport); } /** * rport_fast_io_fail_timedout() - fast I/O failure timeout handler * @work: Work structure used for scheduling this operation. */ static void rport_fast_io_fail_timedout(struct work_struct *work) { struct srp_rport *rport = container_of(to_delayed_work(work), struct srp_rport, fast_io_fail_work); struct Scsi_Host *shost = rport_to_shost(rport); pr_info("fast_io_fail_tmo expired for SRP %s / %s.\n", dev_name(&rport->dev), dev_name(&shost->shost_gendev)); mutex_lock(&rport->mutex); if (rport->state == SRP_RPORT_BLOCKED) __rport_fail_io_fast(rport); mutex_unlock(&rport->mutex); } /** * rport_dev_loss_timedout() - device loss timeout handler * @work: Work structure used for scheduling this operation. */ static void rport_dev_loss_timedout(struct work_struct *work) { struct srp_rport *rport = container_of(to_delayed_work(work), struct srp_rport, dev_loss_work); struct Scsi_Host *shost = rport_to_shost(rport); struct srp_internal *i = to_srp_internal(shost->transportt); pr_info("dev_loss_tmo expired for SRP %s / %s.\n", dev_name(&rport->dev), dev_name(&shost->shost_gendev)); mutex_lock(&rport->mutex); WARN_ON(srp_rport_set_state(rport, SRP_RPORT_LOST) != 0); scsi_target_unblock(rport->dev.parent, SDEV_TRANSPORT_OFFLINE); mutex_unlock(&rport->mutex); i->f->rport_delete(rport); } static void __srp_start_tl_fail_timers(struct srp_rport *rport) { struct Scsi_Host *shost = rport_to_shost(rport); int delay, fast_io_fail_tmo, dev_loss_tmo; lockdep_assert_held(&rport->mutex); delay = rport->reconnect_delay; fast_io_fail_tmo = rport->fast_io_fail_tmo; dev_loss_tmo = rport->dev_loss_tmo; pr_debug("%s current state: %d\n", dev_name(&shost->shost_gendev), rport->state); if (rport->state == SRP_RPORT_LOST) return; if (delay > 0) queue_delayed_work(system_long_wq, &rport->reconnect_work, 1UL * delay * HZ); if ((fast_io_fail_tmo >= 0 || dev_loss_tmo >= 0) && srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) { pr_debug("%s new state: %d\n", dev_name(&shost->shost_gendev), rport->state); scsi_block_targets(shost, &shost->shost_gendev); if (fast_io_fail_tmo >= 0) queue_delayed_work(system_long_wq, &rport->fast_io_fail_work, 1UL * fast_io_fail_tmo * HZ); if (dev_loss_tmo >= 0) queue_delayed_work(system_long_wq, &rport->dev_loss_work, 1UL * dev_loss_tmo * HZ); } } /** * srp_start_tl_fail_timers() - start the transport layer failure timers * @rport: SRP target port. * * Start the transport layer fast I/O failure and device loss timers. Do not * modify a timer that was already started. */ void srp_start_tl_fail_timers(struct srp_rport *rport) { mutex_lock(&rport->mutex); __srp_start_tl_fail_timers(rport); mutex_unlock(&rport->mutex); } EXPORT_SYMBOL(srp_start_tl_fail_timers); /** * srp_reconnect_rport() - reconnect to an SRP target port * @rport: SRP target port. * * Blocks SCSI command queueing before invoking reconnect() such that * queuecommand() won't be invoked concurrently with reconnect() from outside * the SCSI EH. This is important since a reconnect() implementation may * reallocate resources needed by queuecommand(). * * Notes: * - This function neither waits until outstanding requests have finished nor * tries to abort these. It is the responsibility of the reconnect() * function to finish outstanding commands before reconnecting to the target * port. * - It is the responsibility of the caller to ensure that the resources * reallocated by the reconnect() function won't be used while this function * is in progress. One possible strategy is to invoke this function from * the context of the SCSI EH thread only. Another possible strategy is to * lock the rport mutex inside each SCSI LLD callback that can be invoked by * the SCSI EH (the scsi_host_template.eh_*() functions and also the * scsi_host_template.queuecommand() function). */ int srp_reconnect_rport(struct srp_rport *rport) { struct Scsi_Host *shost = rport_to_shost(rport); struct srp_internal *i = to_srp_internal(shost->transportt); struct scsi_device *sdev; int res; pr_debug("SCSI host %s\n", dev_name(&shost->shost_gendev)); res = mutex_lock_interruptible(&rport->mutex); if (res) goto out; if (rport->state != SRP_RPORT_FAIL_FAST && rport->state != SRP_RPORT_LOST) /* * sdev state must be SDEV_TRANSPORT_OFFLINE, transition * to SDEV_BLOCK is illegal. Calling scsi_target_unblock() * later is ok though, scsi_internal_device_unblock_nowait() * treats SDEV_TRANSPORT_OFFLINE like SDEV_BLOCK. */ scsi_block_targets(shost, &shost->shost_gendev); res = rport->state != SRP_RPORT_LOST ? i->f->reconnect(rport) : -ENODEV; pr_debug("%s (state %d): transport.reconnect() returned %d\n", dev_name(&shost->shost_gendev), rport->state, res); if (res == 0) { cancel_delayed_work(&rport->fast_io_fail_work); cancel_delayed_work(&rport->dev_loss_work); rport->failed_reconnects = 0; srp_rport_set_state(rport, SRP_RPORT_RUNNING); scsi_target_unblock(&shost->shost_gendev, SDEV_RUNNING); /* * If the SCSI error handler has offlined one or more devices, * invoking scsi_target_unblock() won't change the state of * these devices into running so do that explicitly. */ shost_for_each_device(sdev, shost) { mutex_lock(&sdev->state_mutex); if (sdev->sdev_state == SDEV_OFFLINE) sdev->sdev_state = SDEV_RUNNING; mutex_unlock(&sdev->state_mutex); } } else if (rport->state == SRP_RPORT_RUNNING) { /* * srp_reconnect_rport() has been invoked with fast_io_fail * and dev_loss off. Mark the port as failed and start the TL * failure timers if these had not yet been started. */ __rport_fail_io_fast(rport); __srp_start_tl_fail_timers(rport); } else if (rport->state != SRP_RPORT_BLOCKED) { scsi_target_unblock(&shost->shost_gendev, SDEV_TRANSPORT_OFFLINE); } mutex_unlock(&rport->mutex); out: return res; } EXPORT_SYMBOL(srp_reconnect_rport); /** * srp_timed_out() - SRP transport intercept of the SCSI timeout EH * @scmd: SCSI command. * * If a timeout occurs while an rport is in the blocked state, ask the SCSI * EH to continue waiting (SCSI_EH_RESET_TIMER). Otherwise let the SCSI core * handle the timeout (SCSI_EH_NOT_HANDLED). * * Note: This function is called from soft-IRQ context and with the request * queue lock held. */ enum scsi_timeout_action srp_timed_out(struct scsi_cmnd *scmd) { struct scsi_device *sdev = scmd->device; struct Scsi_Host *shost = sdev->host; struct srp_internal *i = to_srp_internal(shost->transportt); struct srp_rport *rport = shost_to_rport(shost); pr_debug("timeout for sdev %s\n", dev_name(&sdev->sdev_gendev)); return rport && rport->fast_io_fail_tmo < 0 && rport->dev_loss_tmo < 0 && i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ? SCSI_EH_RESET_TIMER : SCSI_EH_NOT_HANDLED; } EXPORT_SYMBOL(srp_timed_out); static void srp_rport_release(struct device *dev) { struct srp_rport *rport = dev_to_rport(dev); put_device(dev->parent); kfree(rport); } static int scsi_is_srp_rport(const struct device *dev) { return dev->release == srp_rport_release; } static int srp_rport_match(struct attribute_container *cont, struct device *dev) { struct Scsi_Host *shost; struct srp_internal *i; if (!scsi_is_srp_rport(dev)) return 0; shost = dev_to_shost(dev->parent); if (!shost->transportt) return 0; if (shost->transportt->host_attrs.ac.class != &srp_host_class.class) return 0; i = to_srp_internal(shost->transportt); return &i->rport_attr_cont.ac == cont; } static int srp_host_match(struct attribute_container *cont, struct device *dev) { struct Scsi_Host *shost; struct srp_internal *i; if (!scsi_is_host_device(dev)) return 0; shost = dev_to_shost(dev); if (!shost->transportt) return 0; if (shost->transportt->host_attrs.ac.class != &srp_host_class.class) return 0; i = to_srp_internal(shost->transportt); return &i->t.host_attrs.ac == cont; } /** * srp_rport_get() - increment rport reference count * @rport: SRP target port. */ void srp_rport_get(struct srp_rport *rport) { get_device(&rport->dev); } EXPORT_SYMBOL(srp_rport_get); /** * srp_rport_put() - decrement rport reference count * @rport: SRP target port. */ void srp_rport_put(struct srp_rport *rport) { put_device(&rport->dev); } EXPORT_SYMBOL(srp_rport_put); /** * srp_rport_add - add a SRP remote port to the device hierarchy * @shost: scsi host the remote port is connected to. * @ids: The port id for the remote port. * * Publishes a port to the rest of the system. */ struct srp_rport *srp_rport_add(struct Scsi_Host *shost, struct srp_rport_identifiers *ids) { struct srp_rport *rport; struct device *parent = &shost->shost_gendev; struct srp_internal *i = to_srp_internal(shost->transportt); int id, ret; rport = kzalloc(sizeof(*rport), GFP_KERNEL); if (!rport) return ERR_PTR(-ENOMEM); mutex_init(&rport->mutex); device_initialize(&rport->dev); rport->dev.parent = get_device(parent); rport->dev.release = srp_rport_release; memcpy(rport->port_id, ids->port_id, sizeof(rport->port_id)); rport->roles = ids->roles; if (i->f->reconnect) rport->reconnect_delay = i->f->reconnect_delay ? *i->f->reconnect_delay : 10; INIT_DELAYED_WORK(&rport->reconnect_work, srp_reconnect_work); rport->fast_io_fail_tmo = i->f->fast_io_fail_tmo ? *i->f->fast_io_fail_tmo : 15; rport->dev_loss_tmo = i->f->dev_loss_tmo ? *i->f->dev_loss_tmo : 60; INIT_DELAYED_WORK(&rport->fast_io_fail_work, rport_fast_io_fail_timedout); INIT_DELAYED_WORK(&rport->dev_loss_work, rport_dev_loss_timedout); id = atomic_inc_return(&to_srp_host_attrs(shost)->next_port_id); dev_set_name(&rport->dev, "port-%d:%d", shost->host_no, id); transport_setup_device(&rport->dev); ret = device_add(&rport->dev); if (ret) { transport_destroy_device(&rport->dev); put_device(&rport->dev); return ERR_PTR(ret); } transport_add_device(&rport->dev); transport_configure_device(&rport->dev); return rport; } EXPORT_SYMBOL_GPL(srp_rport_add); /** * srp_rport_del - remove a SRP remote port * @rport: SRP remote port to remove * * Removes the specified SRP remote port. */ void srp_rport_del(struct srp_rport *rport) { struct device *dev = &rport->dev; transport_remove_device(dev); device_del(dev); transport_destroy_device(dev); put_device(dev); } EXPORT_SYMBOL_GPL(srp_rport_del); static int do_srp_rport_del(struct device *dev, void *data) { if (scsi_is_srp_rport(dev)) srp_rport_del(dev_to_rport(dev)); return 0; } /** * srp_remove_host - tear down a Scsi_Host's SRP data structures * @shost: Scsi Host that is torn down * * Removes all SRP remote ports for a given Scsi_Host. * Must be called just before scsi_remove_host for SRP HBAs. */ void srp_remove_host(struct Scsi_Host *shost) { device_for_each_child(&shost->shost_gendev, NULL, do_srp_rport_del); } EXPORT_SYMBOL_GPL(srp_remove_host); /** * srp_stop_rport_timers - stop the transport layer recovery timers * @rport: SRP remote port for which to stop the timers. * * Must be called after srp_remove_host() and scsi_remove_host(). The caller * must hold a reference on the rport (rport->dev) and on the SCSI host * (rport->dev.parent). */ void srp_stop_rport_timers(struct srp_rport *rport) { mutex_lock(&rport->mutex); if (rport->state == SRP_RPORT_BLOCKED) __rport_fail_io_fast(rport); srp_rport_set_state(rport, SRP_RPORT_LOST); mutex_unlock(&rport->mutex); cancel_delayed_work_sync(&rport->reconnect_work); cancel_delayed_work_sync(&rport->fast_io_fail_work); cancel_delayed_work_sync(&rport->dev_loss_work); } EXPORT_SYMBOL_GPL(srp_stop_rport_timers); /** * srp_attach_transport - instantiate SRP transport template * @ft: SRP transport class function template */ struct scsi_transport_template * srp_attach_transport(struct srp_function_template *ft) { int count; struct srp_internal *i; i = kzalloc(sizeof(*i), GFP_KERNEL); if (!i) return NULL; i->t.host_size = sizeof(struct srp_host_attrs); i->t.host_attrs.ac.attrs = &i->host_attrs[0]; i->t.host_attrs.ac.class = &srp_host_class.class; i->t.host_attrs.ac.match = srp_host_match; i->host_attrs[0] = NULL; transport_container_register(&i->t.host_attrs); i->rport_attr_cont.ac.attrs = &i->rport_attrs[0]; i->rport_attr_cont.ac.class = &srp_rport_class.class; i->rport_attr_cont.ac.match = srp_rport_match; count = 0; i->rport_attrs[count++] = &dev_attr_port_id; i->rport_attrs[count++] = &dev_attr_roles; if (ft->has_rport_state) { i->rport_attrs[count++] = &dev_attr_state; i->rport_attrs[count++] = &dev_attr_fast_io_fail_tmo; i->rport_attrs[count++] = &dev_attr_dev_loss_tmo; } if (ft->reconnect) { i->rport_attrs[count++] = &dev_attr_reconnect_delay; i->rport_attrs[count++] = &dev_attr_failed_reconnects; } if (ft->rport_delete) i->rport_attrs[count++] = &dev_attr_delete; i->rport_attrs[count++] = NULL; BUG_ON(count > ARRAY_SIZE(i->rport_attrs)); transport_container_register(&i->rport_attr_cont); i->f = ft; return &i->t; } EXPORT_SYMBOL_GPL(srp_attach_transport); /** * srp_release_transport - release SRP transport template instance * @t: transport template instance */ void srp_release_transport(struct scsi_transport_template *t) { struct srp_internal *i = to_srp_internal(t); transport_container_unregister(&i->t.host_attrs); transport_container_unregister(&i->rport_attr_cont); kfree(i); } EXPORT_SYMBOL_GPL(srp_release_transport); static __init int srp_transport_init(void) { int ret; ret = transport_class_register(&srp_host_class); if (ret) return ret; ret = transport_class_register(&srp_rport_class); if (ret) goto unregister_host_class; return 0; unregister_host_class: transport_class_unregister(&srp_host_class); return ret; } static void __exit srp_transport_exit(void) { transport_class_unregister(&srp_host_class); transport_class_unregister(&srp_rport_class); } MODULE_AUTHOR("FUJITA Tomonori"); MODULE_DESCRIPTION("SRP Transport Attributes"); MODULE_LICENSE("GPL"); module_init(srp_transport_init); module_exit(srp_transport_exit); |
| 2 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | // SPDX-License-Identifier: GPL-2.0 /* * Generic USB GNSS receiver driver * * Copyright (C) 2021 Johan Hovold <johan@kernel.org> */ #include <linux/errno.h> #include <linux/gnss.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #define GNSS_USB_READ_BUF_LEN 512 #define GNSS_USB_WRITE_TIMEOUT 1000 static const struct usb_device_id gnss_usb_id_table[] = { { USB_DEVICE(0x1199, 0xb000) }, /* Sierra Wireless XM1210 */ { } }; MODULE_DEVICE_TABLE(usb, gnss_usb_id_table); struct gnss_usb { struct usb_device *udev; struct usb_interface *intf; struct gnss_device *gdev; struct urb *read_urb; unsigned int write_pipe; }; static void gnss_usb_rx_complete(struct urb *urb) { struct gnss_usb *gusb = urb->context; struct gnss_device *gdev = gusb->gdev; int status = urb->status; int len; int ret; switch (status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&gdev->dev, "urb stopped: %d\n", status); return; case -EPIPE: dev_err(&gdev->dev, "urb stopped: %d\n", status); return; default: dev_dbg(&gdev->dev, "nonzero urb status: %d\n", status); goto resubmit; } len = urb->actual_length; if (len == 0) goto resubmit; ret = gnss_insert_raw(gdev, urb->transfer_buffer, len); if (ret < len) dev_dbg(&gdev->dev, "dropped %d bytes\n", len - ret); resubmit: ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret && ret != -EPERM && ret != -ENODEV) dev_err(&gdev->dev, "failed to resubmit urb: %d\n", ret); } static int gnss_usb_open(struct gnss_device *gdev) { struct gnss_usb *gusb = gnss_get_drvdata(gdev); int ret; ret = usb_submit_urb(gusb->read_urb, GFP_KERNEL); if (ret) { if (ret != -EPERM && ret != -ENODEV) dev_err(&gdev->dev, "failed to submit urb: %d\n", ret); return ret; } return 0; } static void gnss_usb_close(struct gnss_device *gdev) { struct gnss_usb *gusb = gnss_get_drvdata(gdev); usb_kill_urb(gusb->read_urb); } static int gnss_usb_write_raw(struct gnss_device *gdev, const unsigned char *buf, size_t count) { struct gnss_usb *gusb = gnss_get_drvdata(gdev); void *tbuf; int ret; tbuf = kmemdup(buf, count, GFP_KERNEL); if (!tbuf) return -ENOMEM; ret = usb_bulk_msg(gusb->udev, gusb->write_pipe, tbuf, count, NULL, GNSS_USB_WRITE_TIMEOUT); kfree(tbuf); if (ret) return ret; return count; } static const struct gnss_operations gnss_usb_gnss_ops = { .open = gnss_usb_open, .close = gnss_usb_close, .write_raw = gnss_usb_write_raw, }; static int gnss_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *in, *out; struct gnss_device *gdev; struct gnss_usb *gusb; struct urb *urb; size_t buf_len; void *buf; int ret; ret = usb_find_common_endpoints(intf->cur_altsetting, &in, &out, NULL, NULL); if (ret) return ret; gusb = kzalloc(sizeof(*gusb), GFP_KERNEL); if (!gusb) return -ENOMEM; gdev = gnss_allocate_device(&intf->dev); if (!gdev) { ret = -ENOMEM; goto err_free_gusb; } gdev->ops = &gnss_usb_gnss_ops; gdev->type = GNSS_TYPE_NMEA; gnss_set_drvdata(gdev, gusb); urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { ret = -ENOMEM; goto err_put_gdev; } buf_len = max(usb_endpoint_maxp(in), GNSS_USB_READ_BUF_LEN); buf = kzalloc(buf_len, GFP_KERNEL); if (!buf) { ret = -ENOMEM; goto err_free_urb; } usb_fill_bulk_urb(urb, udev, usb_rcvbulkpipe(udev, usb_endpoint_num(in)), buf, buf_len, gnss_usb_rx_complete, gusb); gusb->intf = intf; gusb->udev = udev; gusb->gdev = gdev; gusb->read_urb = urb; gusb->write_pipe = usb_sndbulkpipe(udev, usb_endpoint_num(out)); ret = gnss_register_device(gdev); if (ret) goto err_free_buf; usb_set_intfdata(intf, gusb); return 0; err_free_buf: kfree(buf); err_free_urb: usb_free_urb(urb); err_put_gdev: gnss_put_device(gdev); err_free_gusb: kfree(gusb); return ret; } static void gnss_usb_disconnect(struct usb_interface *intf) { struct gnss_usb *gusb = usb_get_intfdata(intf); gnss_deregister_device(gusb->gdev); kfree(gusb->read_urb->transfer_buffer); usb_free_urb(gusb->read_urb); gnss_put_device(gusb->gdev); kfree(gusb); } static struct usb_driver gnss_usb_driver = { .name = "gnss-usb", .probe = gnss_usb_probe, .disconnect = gnss_usb_disconnect, .id_table = gnss_usb_id_table, }; module_usb_driver(gnss_usb_driver); MODULE_AUTHOR("Johan Hovold <johan@kernel.org>"); MODULE_DESCRIPTION("Generic USB GNSS receiver driver"); MODULE_LICENSE("GPL v2"); |
| 137 138 112 30 30 26 26 39 39 1 26 27 2 1 1 1 1 2 1 445 440 1 1 2 3 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/types.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/tty.h> #include <linux/fcntl.h> #include <linux/uaccess.h> #include "tty.h" static int is_ignored(int sig) { return (sigismember(¤t->blocked, sig) || current->sighand->action[sig-1].sa.sa_handler == SIG_IGN); } /** * __tty_check_change - check for POSIX terminal changes * @tty: tty to check * @sig: signal to send * * If we try to write to, or set the state of, a terminal and we're * not in the foreground, send a SIGTTOU. If the signal is blocked or * ignored, go ahead and perform the operation. (POSIX 7.2) * * Locking: ctrl.lock */ int __tty_check_change(struct tty_struct *tty, int sig) { unsigned long flags; struct pid *pgrp, *tty_pgrp; int ret = 0; if (current->signal->tty != tty) return 0; rcu_read_lock(); pgrp = task_pgrp(current); spin_lock_irqsave(&tty->ctrl.lock, flags); tty_pgrp = tty->ctrl.pgrp; spin_unlock_irqrestore(&tty->ctrl.lock, flags); if (tty_pgrp && pgrp != tty_pgrp) { if (is_ignored(sig)) { if (sig == SIGTTIN) ret = -EIO; } else if (is_current_pgrp_orphaned()) ret = -EIO; else { kill_pgrp(pgrp, sig, 1); set_thread_flag(TIF_SIGPENDING); ret = -ERESTARTSYS; } } rcu_read_unlock(); if (!tty_pgrp) tty_warn(tty, "sig=%d, tty->pgrp == NULL!\n", sig); return ret; } int tty_check_change(struct tty_struct *tty) { return __tty_check_change(tty, SIGTTOU); } EXPORT_SYMBOL(tty_check_change); void proc_clear_tty(struct task_struct *p) { unsigned long flags; struct tty_struct *tty; spin_lock_irqsave(&p->sighand->siglock, flags); tty = p->signal->tty; p->signal->tty = NULL; spin_unlock_irqrestore(&p->sighand->siglock, flags); tty_kref_put(tty); } /** * __proc_set_tty - set the controlling terminal * @tty: tty structure * * Only callable by the session leader and only if it does not already have * a controlling terminal. * * Caller must hold: tty_lock() * a readlock on tasklist_lock * sighand lock */ static void __proc_set_tty(struct tty_struct *tty) { unsigned long flags; spin_lock_irqsave(&tty->ctrl.lock, flags); /* * The session and fg pgrp references will be non-NULL if * tiocsctty() is stealing the controlling tty */ put_pid(tty->ctrl.session); put_pid(tty->ctrl.pgrp); tty->ctrl.pgrp = get_pid(task_pgrp(current)); tty->ctrl.session = get_pid(task_session(current)); spin_unlock_irqrestore(&tty->ctrl.lock, flags); if (current->signal->tty) { tty_debug(tty, "current tty %s not NULL!!\n", current->signal->tty->name); tty_kref_put(current->signal->tty); } put_pid(current->signal->tty_old_pgrp); current->signal->tty = tty_kref_get(tty); current->signal->tty_old_pgrp = NULL; } static void proc_set_tty(struct tty_struct *tty) { spin_lock_irq(¤t->sighand->siglock); __proc_set_tty(tty); spin_unlock_irq(¤t->sighand->siglock); } /* * Called by tty_open() to set the controlling tty if applicable. */ void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty) { read_lock(&tasklist_lock); spin_lock_irq(¤t->sighand->siglock); if (current->signal->leader && !current->signal->tty && tty->ctrl.session == NULL) { /* * Don't let a process that only has write access to the tty * obtain the privileges associated with having a tty as * controlling terminal (being able to reopen it with full * access through /dev/tty, being able to perform pushback). * Many distributions set the group of all ttys to "tty" and * grant write-only access to all terminals for setgid tty * binaries, which should not imply full privileges on all ttys. * * This could theoretically break old code that performs open() * on a write-only file descriptor. In that case, it might be * necessary to also permit this if * inode_permission(inode, MAY_READ) == 0. */ if (filp->f_mode & FMODE_READ) __proc_set_tty(tty); } spin_unlock_irq(¤t->sighand->siglock); read_unlock(&tasklist_lock); } struct tty_struct *get_current_tty(void) { struct tty_struct *tty; unsigned long flags; spin_lock_irqsave(¤t->sighand->siglock, flags); tty = tty_kref_get(current->signal->tty); spin_unlock_irqrestore(¤t->sighand->siglock, flags); return tty; } EXPORT_SYMBOL_GPL(get_current_tty); /* * Called from tty_release(). */ void session_clear_tty(struct pid *session) { struct task_struct *p; do_each_pid_task(session, PIDTYPE_SID, p) { proc_clear_tty(p); } while_each_pid_task(session, PIDTYPE_SID, p); } /** * tty_signal_session_leader - sends SIGHUP to session leader * @tty: controlling tty * @exit_session: if non-zero, signal all foreground group processes * * Send SIGHUP and SIGCONT to the session leader and its process group. * Optionally, signal all processes in the foreground process group. * * Returns the number of processes in the session with this tty * as their controlling terminal. This value is used to drop * tty references for those processes. */ int tty_signal_session_leader(struct tty_struct *tty, int exit_session) { struct task_struct *p; int refs = 0; struct pid *tty_pgrp = NULL; read_lock(&tasklist_lock); if (tty->ctrl.session) { do_each_pid_task(tty->ctrl.session, PIDTYPE_SID, p) { spin_lock_irq(&p->sighand->siglock); if (p->signal->tty == tty) { p->signal->tty = NULL; /* * We defer the dereferences outside of * the tasklist lock. */ refs++; } if (!p->signal->leader) { spin_unlock_irq(&p->sighand->siglock); continue; } send_signal_locked(SIGHUP, SEND_SIG_PRIV, p, PIDTYPE_TGID); send_signal_locked(SIGCONT, SEND_SIG_PRIV, p, PIDTYPE_TGID); put_pid(p->signal->tty_old_pgrp); /* A noop */ spin_lock(&tty->ctrl.lock); tty_pgrp = get_pid(tty->ctrl.pgrp); if (tty->ctrl.pgrp) p->signal->tty_old_pgrp = get_pid(tty->ctrl.pgrp); spin_unlock(&tty->ctrl.lock); spin_unlock_irq(&p->sighand->siglock); } while_each_pid_task(tty->ctrl.session, PIDTYPE_SID, p); } read_unlock(&tasklist_lock); if (tty_pgrp) { if (exit_session) kill_pgrp(tty_pgrp, SIGHUP, exit_session); put_pid(tty_pgrp); } return refs; } /** * disassociate_ctty - disconnect controlling tty * @on_exit: true if exiting so need to "hang up" the session * * This function is typically called only by the session leader, when * it wants to disassociate itself from its controlling tty. * * It performs the following functions: * (1) Sends a SIGHUP and SIGCONT to the foreground process group * (2) Clears the tty from being controlling the session * (3) Clears the controlling tty for all processes in the * session group. * * The argument on_exit is set to 1 if called when a process is * exiting; it is 0 if called by the ioctl TIOCNOTTY. * * Locking: * BTM is taken for hysterical raisons, and held when * called from no_tty(). * tty_mutex is taken to protect tty * ->siglock is taken to protect ->signal/->sighand * tasklist_lock is taken to walk process list for sessions * ->siglock is taken to protect ->signal/->sighand */ void disassociate_ctty(int on_exit) { struct tty_struct *tty; if (!current->signal->leader) return; tty = get_current_tty(); if (tty) { if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY) { tty_vhangup_session(tty); } else { struct pid *tty_pgrp = tty_get_pgrp(tty); if (tty_pgrp) { kill_pgrp(tty_pgrp, SIGHUP, on_exit); if (!on_exit) kill_pgrp(tty_pgrp, SIGCONT, on_exit); put_pid(tty_pgrp); } } tty_kref_put(tty); } else if (on_exit) { struct pid *old_pgrp; spin_lock_irq(¤t->sighand->siglock); old_pgrp = current->signal->tty_old_pgrp; current->signal->tty_old_pgrp = NULL; spin_unlock_irq(¤t->sighand->siglock); if (old_pgrp) { kill_pgrp(old_pgrp, SIGHUP, on_exit); kill_pgrp(old_pgrp, SIGCONT, on_exit); put_pid(old_pgrp); } return; } tty = get_current_tty(); if (tty) { unsigned long flags; tty_lock(tty); spin_lock_irqsave(&tty->ctrl.lock, flags); put_pid(tty->ctrl.session); put_pid(tty->ctrl.pgrp); tty->ctrl.session = NULL; tty->ctrl.pgrp = NULL; spin_unlock_irqrestore(&tty->ctrl.lock, flags); tty_unlock(tty); tty_kref_put(tty); } /* If tty->ctrl.pgrp is not NULL, it may be assigned to * current->signal->tty_old_pgrp in a race condition, and * cause pid memleak. Release current->signal->tty_old_pgrp * after tty->ctrl.pgrp set to NULL. */ spin_lock_irq(¤t->sighand->siglock); put_pid(current->signal->tty_old_pgrp); current->signal->tty_old_pgrp = NULL; spin_unlock_irq(¤t->sighand->siglock); /* Now clear signal->tty under the lock */ read_lock(&tasklist_lock); session_clear_tty(task_session(current)); read_unlock(&tasklist_lock); } /* * * no_tty - Ensure the current process does not have a controlling tty */ void no_tty(void) { /* * FIXME: Review locking here. The tty_lock never covered any race * between a new association and proc_clear_tty but possibly we need * to protect against this anyway. */ struct task_struct *tsk = current; disassociate_ctty(0); proc_clear_tty(tsk); } /** * tiocsctty - set controlling tty * @tty: tty structure * @file: file structure used to check permissions * @arg: user argument * * This ioctl is used to manage job control. It permits a session * leader to set this tty as the controlling tty for the session. * * Locking: * Takes tty_lock() to serialize proc_set_tty() for this tty * Takes tasklist_lock internally to walk sessions * Takes ->siglock() when updating signal->tty */ static int tiocsctty(struct tty_struct *tty, struct file *file, int arg) { int ret = 0; tty_lock(tty); read_lock(&tasklist_lock); if (current->signal->leader && task_session(current) == tty->ctrl.session) goto unlock; /* * The process must be a session leader and * not have a controlling tty already. */ if (!current->signal->leader || current->signal->tty) { ret = -EPERM; goto unlock; } if (tty->ctrl.session) { /* * This tty is already the controlling * tty for another session group! */ if (arg == 1 && capable(CAP_SYS_ADMIN)) { /* * Steal it away */ session_clear_tty(tty->ctrl.session); } else { ret = -EPERM; goto unlock; } } /* See the comment in tty_open_proc_set_tty(). */ if ((file->f_mode & FMODE_READ) == 0 && !capable(CAP_SYS_ADMIN)) { ret = -EPERM; goto unlock; } proc_set_tty(tty); unlock: read_unlock(&tasklist_lock); tty_unlock(tty); return ret; } /** * tty_get_pgrp - return a ref counted pgrp pid * @tty: tty to read * * Returns a refcounted instance of the pid struct for the process * group controlling the tty. */ struct pid *tty_get_pgrp(struct tty_struct *tty) { unsigned long flags; struct pid *pgrp; spin_lock_irqsave(&tty->ctrl.lock, flags); pgrp = get_pid(tty->ctrl.pgrp); spin_unlock_irqrestore(&tty->ctrl.lock, flags); return pgrp; } EXPORT_SYMBOL_GPL(tty_get_pgrp); /* * This checks not only the pgrp, but falls back on the pid if no * satisfactory pgrp is found. I dunno - gdb doesn't work correctly * without this... * * The caller must hold rcu lock or the tasklist lock. */ static struct pid *session_of_pgrp(struct pid *pgrp) { struct task_struct *p; struct pid *sid = NULL; p = pid_task(pgrp, PIDTYPE_PGID); if (p == NULL) p = pid_task(pgrp, PIDTYPE_PID); if (p != NULL) sid = task_session(p); return sid; } /** * tiocgpgrp - get process group * @tty: tty passed by user * @real_tty: tty side of the tty passed by the user if a pty else the tty * @p: returned pid * * Obtain the process group of the tty. If there is no process group * return an error. * * Locking: none. Reference to current->signal->tty is safe. */ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { struct pid *pid; int ret; /* * (tty == real_tty) is a cheap way of * testing if the tty is NOT a master pty. */ if (tty == real_tty && current->signal->tty != real_tty) return -ENOTTY; pid = tty_get_pgrp(real_tty); ret = put_user(pid_vnr(pid), p); put_pid(pid); return ret; } /** * tiocspgrp - attempt to set process group * @tty: tty passed by user * @real_tty: tty side device matching tty passed by user * @p: pid pointer * * Set the process group of the tty to the session passed. Only * permitted where the tty session is our session. * * Locking: RCU, ctrl lock */ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { struct pid *pgrp; pid_t pgrp_nr; int retval = tty_check_change(real_tty); if (retval == -EIO) return -ENOTTY; if (retval) return retval; if (get_user(pgrp_nr, p)) return -EFAULT; if (pgrp_nr < 0) return -EINVAL; spin_lock_irq(&real_tty->ctrl.lock); if (!current->signal->tty || (current->signal->tty != real_tty) || (real_tty->ctrl.session != task_session(current))) { retval = -ENOTTY; goto out_unlock_ctrl; } rcu_read_lock(); pgrp = find_vpid(pgrp_nr); retval = -ESRCH; if (!pgrp) goto out_unlock; retval = -EPERM; if (session_of_pgrp(pgrp) != task_session(current)) goto out_unlock; retval = 0; put_pid(real_tty->ctrl.pgrp); real_tty->ctrl.pgrp = get_pid(pgrp); out_unlock: rcu_read_unlock(); out_unlock_ctrl: spin_unlock_irq(&real_tty->ctrl.lock); return retval; } /** * tiocgsid - get session id * @tty: tty passed by user * @real_tty: tty side of the tty passed by the user if a pty else the tty * @p: pointer to returned session id * * Obtain the session id of the tty. If there is no session * return an error. */ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { unsigned long flags; pid_t sid; /* * (tty == real_tty) is a cheap way of * testing if the tty is NOT a master pty. */ if (tty == real_tty && current->signal->tty != real_tty) return -ENOTTY; spin_lock_irqsave(&real_tty->ctrl.lock, flags); if (!real_tty->ctrl.session) goto err; sid = pid_vnr(real_tty->ctrl.session); spin_unlock_irqrestore(&real_tty->ctrl.lock, flags); return put_user(sid, p); err: spin_unlock_irqrestore(&real_tty->ctrl.lock, flags); return -ENOTTY; } /* * Called from tty_ioctl(). If tty is a pty then real_tty is the slave side, * if not then tty == real_tty. */ long tty_jobctrl_ioctl(struct tty_struct *tty, struct tty_struct *real_tty, struct file *file, unsigned int cmd, unsigned long arg) { void __user *p = (void __user *)arg; switch (cmd) { case TIOCNOTTY: if (current->signal->tty != tty) return -ENOTTY; no_tty(); return 0; case TIOCSCTTY: return tiocsctty(real_tty, file, arg); case TIOCGPGRP: return tiocgpgrp(tty, real_tty, p); case TIOCSPGRP: return tiocspgrp(tty, real_tty, p); case TIOCGSID: return tiocgsid(tty, real_tty, p); } return -ENOIOCTLCMD; } |
| 4 4 4 4 4 1 1 416 29 11128 11132 8 20 1 20 5 1 4 86 58 8 19 26 30 10 16 22 28 83 104 69 20 20 20 2 1 1 17 52 43 20 42 33 1 1 1 37 10 10 5 37 4 11 1 38 4 43 11 11 4 1 10 1 1 9 9 9 9 79 1 79 50 44 78 72 68 12 46 5 40 32 17 45 71 51 52 72 2 46 10 12 8 1 8 8 7 7 8 1 1 2 17 1 16 1 7 8 8 1 2 5 1 14 16 9 6 7 9 44 43 3 45 45 7 7 7 4 1 4 9 7 4 8 1 7 4 7 6 46 45 7 7 39 1 79 1 79 46 15 59 74 1 5 1 66 21 22 24 46 1 5 7 20 46 62 5 67 15 52 43 2 10 29 110 72 2 43 12 1 42 1 1 3 2 3 2 89 7 83 2 82 9 1 1 7 5 6 5 4 4 4 7 86 86 15 1 13 16 16 2 1 13 14 1 13 1 12 12 1 1 9 2 17 17 22 22 16 2 14 5 23 23 13 13 90 81 14 90 14 1607 27 1604 1610 14 1609 1607 8 8 1 1 4 4 4 4 11 11 5 6775 77 6747 2 1 5 1 1 4 4 4 4 4 4 1 1 4 7 7 7 1 11243 481 11080 5 1 11 11 1 11 17 4 4 50 5 1 11207 5 11203 2 34 34 11212 23 22 351 1878 1545 1545 1549 7789 464 10553 5720 5778 5 5735 244 6 4 6 2 2 2 2 2 2 2 2 2 1 1 2 1 429 432 6 2 1 417 504 493 16 56 3 51 56 38 49 1 2 6 23 8 30 21 14 4 19 18 6 10 357 346 17 13 7 1 1 1 1 1 7 3 14 16 12 4 4 4 2 2 14 16 16 4 187 186 3 3 3 3 3 3 16 2 14 16 16 69 66 1 3 3 36 18 18 14 14 9 1 5 2 3 1 1 2 5 2 2 5 1 6 11 8 4 3 1 33 18 18 28 9 24 11 16 21 8 2 8 1 1 17 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 | // SPDX-License-Identifier: GPL-2.0-only /* * Simple NUMA memory policy for the Linux kernel. * * Copyright 2003,2004 Andi Kleen, SuSE Labs. * (C) Copyright 2005 Christoph Lameter, Silicon Graphics, Inc. * * NUMA policy allows the user to give hints in which node(s) memory should * be allocated. * * Support six policies per VMA and per process: * * The VMA policy has priority over the process policy for a page fault. * * interleave Allocate memory interleaved over a set of nodes, * with normal fallback if it fails. * For VMA based allocations this interleaves based on the * offset into the backing object or offset into the mapping * for anonymous memory. For process policy an process counter * is used. * * weighted interleave * Allocate memory interleaved over a set of nodes based on * a set of weights (per-node), with normal fallback if it * fails. Otherwise operates the same as interleave. * Example: nodeset(0,1) & weights (2,1) - 2 pages allocated * on node 0 for every 1 page allocated on node 1. * * bind Only allocate memory on a specific set of nodes, * no fallback. * FIXME: memory is allocated starting with the first node * to the last. It would be better if bind would truly restrict * the allocation to memory nodes instead * * preferred Try a specific node first before normal fallback. * As a special case NUMA_NO_NODE here means do the allocation * on the local CPU. This is normally identical to default, * but useful to set in a VMA when you have a non default * process policy. * * preferred many Try a set of nodes first before normal fallback. This is * similar to preferred without the special case. * * default Allocate on the local node first, or when on a VMA * use the process policy. This is what Linux always did * in a NUMA aware kernel and still does by, ahem, default. * * The process policy is applied for most non interrupt memory allocations * in that process' context. Interrupts ignore the policies and always * try to allocate on the local CPU. The VMA policy is only applied for memory * allocations for a VMA in the VM. * * Currently there are a few corner cases in swapping where the policy * is not applied, but the majority should be handled. When process policy * is used it is not remembered over swap outs/swap ins. * * Only the highest zone in the zone hierarchy gets policied. Allocations * requesting a lower zone just use default policy. This implies that * on systems with highmem kernel lowmem allocation don't get policied. * Same with GFP_DMA allocations. * * For shmem/tmpfs shared memory the policy is shared between * all users and remembered even when nobody has memory mapped. */ /* Notebook: fix mmap readahead to honour policy and enable policy for any page cache object statistics for bigpages global policy for page cache? currently it uses process policy. Requires first item above. handle mremap for shared memory (currently ignored for the policy) grows down? make bind policy root only? It can trigger oom much faster and the kernel is not always grateful with that. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/mempolicy.h> #include <linux/pagewalk.h> #include <linux/highmem.h> #include <linux/hugetlb.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/numa_balancing.h> #include <linux/sched/sysctl.h> #include <linux/sched/task.h> #include <linux/nodemask.h> #include <linux/cpuset.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/export.h> #include <linux/nsproxy.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/compat.h> #include <linux/ptrace.h> #include <linux/swap.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> #include <linux/memory-tiers.h> #include <linux/migrate.h> #include <linux/ksm.h> #include <linux/rmap.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/ctype.h> #include <linux/mm_inline.h> #include <linux/mmu_notifier.h> #include <linux/printk.h> #include <linux/leafops.h> #include <linux/gcd.h> #include <asm/tlbflush.h> #include <asm/tlb.h> #include <linux/uaccess.h> #include <linux/memory.h> #include "internal.h" /* Internal flags */ #define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ #define MPOL_MF_WRLOCK (MPOL_MF_INTERNAL << 2) /* Write-lock walked vmas */ static struct kmem_cache *policy_cache; static struct kmem_cache *sn_cache; /* Highest zone. An specific allocation for a zone below that is not policied. */ enum zone_type policy_zone = 0; /* * run-time system-wide default policy => local allocation */ static struct mempolicy default_policy = { .refcnt = ATOMIC_INIT(1), /* never free it */ .mode = MPOL_LOCAL, }; static struct mempolicy preferred_node_policy[MAX_NUMNODES]; /* * weightiness balances the tradeoff between small weights (cycles through nodes * faster, more fair/even distribution) and large weights (smaller errors * between actual bandwidth ratios and weight ratios). 32 is a number that has * been found to perform at a reasonable compromise between the two goals. */ static const int weightiness = 32; /* * A null weighted_interleave_state is interpreted as having .mode="auto", * and .iw_table is interpreted as an array of 1s with length nr_node_ids. */ struct weighted_interleave_state { bool mode_auto; u8 iw_table[]; }; static struct weighted_interleave_state __rcu *wi_state; static unsigned int *node_bw_table; /* * wi_state_lock protects both wi_state and node_bw_table. * node_bw_table is only used by writers to update wi_state. */ static DEFINE_MUTEX(wi_state_lock); static u8 get_il_weight(int node) { struct weighted_interleave_state *state; u8 weight = 1; rcu_read_lock(); state = rcu_dereference(wi_state); if (state) weight = state->iw_table[node]; rcu_read_unlock(); return weight; } /* * Convert bandwidth values into weighted interleave weights. * Call with wi_state_lock. */ static void reduce_interleave_weights(unsigned int *bw, u8 *new_iw) { u64 sum_bw = 0; unsigned int cast_sum_bw, scaling_factor = 1, iw_gcd = 0; int nid; for_each_node_state(nid, N_MEMORY) sum_bw += bw[nid]; /* Scale bandwidths to whole numbers in the range [1, weightiness] */ for_each_node_state(nid, N_MEMORY) { /* * Try not to perform 64-bit division. * If sum_bw < scaling_factor, then sum_bw < U32_MAX. * If sum_bw > scaling_factor, then round the weight up to 1. */ scaling_factor = weightiness * bw[nid]; if (bw[nid] && sum_bw < scaling_factor) { cast_sum_bw = (unsigned int)sum_bw; new_iw[nid] = scaling_factor / cast_sum_bw; } else { new_iw[nid] = 1; } if (!iw_gcd) iw_gcd = new_iw[nid]; iw_gcd = gcd(iw_gcd, new_iw[nid]); } /* 1:2 is strictly better than 16:32. Reduce by the weights' GCD. */ for_each_node_state(nid, N_MEMORY) new_iw[nid] /= iw_gcd; } int mempolicy_set_node_perf(unsigned int node, struct access_coordinate *coords) { struct weighted_interleave_state *new_wi_state, *old_wi_state = NULL; unsigned int *old_bw, *new_bw; unsigned int bw_val; int i; bw_val = min(coords->read_bandwidth, coords->write_bandwidth); new_bw = kcalloc(nr_node_ids, sizeof(unsigned int), GFP_KERNEL); if (!new_bw) return -ENOMEM; new_wi_state = kmalloc(struct_size(new_wi_state, iw_table, nr_node_ids), GFP_KERNEL); if (!new_wi_state) { kfree(new_bw); return -ENOMEM; } new_wi_state->mode_auto = true; for (i = 0; i < nr_node_ids; i++) new_wi_state->iw_table[i] = 1; /* * Update bandwidth info, even in manual mode. That way, when switching * to auto mode in the future, iw_table can be overwritten using * accurate bw data. */ mutex_lock(&wi_state_lock); old_bw = node_bw_table; if (old_bw) memcpy(new_bw, old_bw, nr_node_ids * sizeof(*old_bw)); new_bw[node] = bw_val; node_bw_table = new_bw; old_wi_state = rcu_dereference_protected(wi_state, lockdep_is_held(&wi_state_lock)); if (old_wi_state && !old_wi_state->mode_auto) { /* Manual mode; skip reducing weights and updating wi_state */ mutex_unlock(&wi_state_lock); kfree(new_wi_state); goto out; } /* NULL wi_state assumes auto=true; reduce weights and update wi_state*/ reduce_interleave_weights(new_bw, new_wi_state->iw_table); rcu_assign_pointer(wi_state, new_wi_state); mutex_unlock(&wi_state_lock); if (old_wi_state) { synchronize_rcu(); kfree(old_wi_state); } out: kfree(old_bw); return 0; } /** * numa_nearest_node - Find nearest node by state * @node: Node id to start the search * @state: State to filter the search * * Lookup the closest node by distance if @nid is not in state. * * Return: this @node if it is in state, otherwise the closest node by distance */ int numa_nearest_node(int node, unsigned int state) { int min_dist = INT_MAX, dist, n, min_node; if (state >= NR_NODE_STATES) return -EINVAL; if (node == NUMA_NO_NODE || node_state(node, state)) return node; min_node = node; for_each_node_state(n, state) { dist = node_distance(node, n); if (dist < min_dist) { min_dist = dist; min_node = n; } } return min_node; } EXPORT_SYMBOL_GPL(numa_nearest_node); /** * nearest_node_nodemask - Find the node in @mask at the nearest distance * from @node. * * @node: a valid node ID to start the search from. * @mask: a pointer to a nodemask representing the allowed nodes. * * This function iterates over all nodes in @mask and calculates the * distance from the starting @node, then it returns the node ID that is * the closest to @node, or MAX_NUMNODES if no node is found. * * Note that @node must be a valid node ID usable with node_distance(), * providing an invalid node ID (e.g., NUMA_NO_NODE) may result in crashes * or unexpected behavior. */ int nearest_node_nodemask(int node, nodemask_t *mask) { int dist, n, min_dist = INT_MAX, min_node = MAX_NUMNODES; for_each_node_mask(n, *mask) { dist = node_distance(node, n); if (dist < min_dist) { min_dist = dist; min_node = n; } } return min_node; } EXPORT_SYMBOL_GPL(nearest_node_nodemask); struct mempolicy *get_task_policy(struct task_struct *p) { struct mempolicy *pol = p->mempolicy; int node; if (pol) return pol; node = numa_node_id(); if (node != NUMA_NO_NODE) { pol = &preferred_node_policy[node]; /* preferred_node_policy is not initialised early in boot */ if (pol->mode) return pol; } return &default_policy; } EXPORT_SYMBOL_FOR_MODULES(get_task_policy, "kvm"); static const struct mempolicy_operations { int (*create)(struct mempolicy *pol, const nodemask_t *nodes); void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes); } mpol_ops[MPOL_MAX]; static inline int mpol_store_user_nodemask(const struct mempolicy *pol) { return pol->flags & MPOL_MODE_FLAGS; } static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig, const nodemask_t *rel) { nodemask_t tmp; nodes_fold(tmp, *orig, nodes_weight(*rel)); nodes_onto(*ret, tmp, *rel); } static int mpol_new_nodemask(struct mempolicy *pol, const nodemask_t *nodes) { if (nodes_empty(*nodes)) return -EINVAL; pol->nodes = *nodes; return 0; } static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes) { if (nodes_empty(*nodes)) return -EINVAL; nodes_clear(pol->nodes); node_set(first_node(*nodes), pol->nodes); return 0; } /* * mpol_set_nodemask is called after mpol_new() to set up the nodemask, if * any, for the new policy. mpol_new() has already validated the nodes * parameter with respect to the policy mode and flags. * * Must be called holding task's alloc_lock to protect task's mems_allowed * and mempolicy. May also be called holding the mmap_lock for write. */ static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes, struct nodemask_scratch *nsc) { int ret; /* * Default (pol==NULL) resp. local memory policies are not a * subject of any remapping. They also do not need any special * constructor. */ if (!pol || pol->mode == MPOL_LOCAL) return 0; /* Check N_MEMORY */ nodes_and(nsc->mask1, cpuset_current_mems_allowed, node_states[N_MEMORY]); VM_BUG_ON(!nodes); if (pol->flags & MPOL_F_RELATIVE_NODES) mpol_relative_nodemask(&nsc->mask2, nodes, &nsc->mask1); else nodes_and(nsc->mask2, *nodes, nsc->mask1); if (mpol_store_user_nodemask(pol)) pol->w.user_nodemask = *nodes; else pol->w.cpuset_mems_allowed = cpuset_current_mems_allowed; ret = mpol_ops[pol->mode].create(pol, &nsc->mask2); return ret; } /* * This function just creates a new policy, does some check and simple * initialization. You must invoke mpol_set_nodemask() to set nodes. */ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, nodemask_t *nodes) { struct mempolicy *policy; if (mode == MPOL_DEFAULT) { if (nodes && !nodes_empty(*nodes)) return ERR_PTR(-EINVAL); return NULL; } VM_BUG_ON(!nodes); /* * MPOL_PREFERRED cannot be used with MPOL_F_STATIC_NODES or * MPOL_F_RELATIVE_NODES if the nodemask is empty (local allocation). * All other modes require a valid pointer to a non-empty nodemask. */ if (mode == MPOL_PREFERRED) { if (nodes_empty(*nodes)) { if (((flags & MPOL_F_STATIC_NODES) || (flags & MPOL_F_RELATIVE_NODES))) return ERR_PTR(-EINVAL); mode = MPOL_LOCAL; } } else if (mode == MPOL_LOCAL) { if (!nodes_empty(*nodes) || (flags & MPOL_F_STATIC_NODES) || (flags & MPOL_F_RELATIVE_NODES)) return ERR_PTR(-EINVAL); } else if (nodes_empty(*nodes)) return ERR_PTR(-EINVAL); policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!policy) return ERR_PTR(-ENOMEM); atomic_set(&policy->refcnt, 1); policy->mode = mode; policy->flags = flags; policy->home_node = NUMA_NO_NODE; return policy; } /* Slow path of a mpol destructor. */ void __mpol_put(struct mempolicy *pol) { if (!atomic_dec_and_test(&pol->refcnt)) return; kmem_cache_free(policy_cache, pol); } EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm"); static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes) { } static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes) { nodemask_t tmp; if (pol->flags & MPOL_F_STATIC_NODES) nodes_and(tmp, pol->w.user_nodemask, *nodes); else if (pol->flags & MPOL_F_RELATIVE_NODES) mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); else { nodes_remap(tmp, pol->nodes, pol->w.cpuset_mems_allowed, *nodes); pol->w.cpuset_mems_allowed = *nodes; } if (nodes_empty(tmp)) tmp = *nodes; pol->nodes = tmp; } static void mpol_rebind_preferred(struct mempolicy *pol, const nodemask_t *nodes) { pol->w.cpuset_mems_allowed = *nodes; } /* * mpol_rebind_policy - Migrate a policy to a different set of nodes * * Per-vma policies are protected by mmap_lock. Allocations using per-task * policies are protected by task->mems_allowed_seq to prevent a premature * OOM/allocation failure due to parallel nodemask modification. */ static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) { if (!pol || pol->mode == MPOL_LOCAL) return; if (!mpol_store_user_nodemask(pol) && nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) return; mpol_ops[pol->mode].rebind(pol, newmask); } /* * Wrapper for mpol_rebind_policy() that just requires task * pointer, and updates task mempolicy. * * Called with task's alloc_lock held. */ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) { mpol_rebind_policy(tsk->mempolicy, new); } /* * Rebind each vma in mm to new nodemask. * * Call holding a reference to mm. Takes mm->mmap_lock during call. */ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) { struct vm_area_struct *vma; VMA_ITERATOR(vmi, mm, 0); mmap_write_lock(mm); for_each_vma(vmi, vma) { vma_start_write(vma); mpol_rebind_policy(vma->vm_policy, new); } mmap_write_unlock(mm); } static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { [MPOL_DEFAULT] = { .rebind = mpol_rebind_default, }, [MPOL_INTERLEAVE] = { .create = mpol_new_nodemask, .rebind = mpol_rebind_nodemask, }, [MPOL_PREFERRED] = { .create = mpol_new_preferred, .rebind = mpol_rebind_preferred, }, [MPOL_BIND] = { .create = mpol_new_nodemask, .rebind = mpol_rebind_nodemask, }, [MPOL_LOCAL] = { .rebind = mpol_rebind_default, }, [MPOL_PREFERRED_MANY] = { .create = mpol_new_nodemask, .rebind = mpol_rebind_preferred, }, [MPOL_WEIGHTED_INTERLEAVE] = { .create = mpol_new_nodemask, .rebind = mpol_rebind_nodemask, }, }; static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags); static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol, pgoff_t ilx, int *nid); static bool strictly_unmovable(unsigned long flags) { /* * STRICT without MOVE flags lets do_mbind() fail immediately with -EIO * if any misplaced page is found. */ return (flags & (MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) == MPOL_MF_STRICT; } struct migration_mpol { /* for alloc_migration_target_by_mpol() */ struct mempolicy *pol; pgoff_t ilx; }; struct queue_pages { struct list_head *pagelist; unsigned long flags; nodemask_t *nmask; unsigned long start; unsigned long end; struct vm_area_struct *first; struct folio *large; /* note last large folio encountered */ long nr_failed; /* could not be isolated at this time */ }; /* * Check if the folio's nid is in qp->nmask. * * If MPOL_MF_INVERT is set in qp->flags, check if the nid is * in the invert of qp->nmask. */ static inline bool queue_folio_required(struct folio *folio, struct queue_pages *qp) { int nid = folio_nid(folio); unsigned long flags = qp->flags; return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); } static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk) { struct folio *folio; struct queue_pages *qp = walk->private; if (unlikely(pmd_is_migration_entry(*pmd))) { qp->nr_failed++; return; } folio = pmd_folio(*pmd); if (is_huge_zero_folio(folio)) { walk->action = ACTION_CONTINUE; return; } if (!queue_folio_required(folio, qp)) return; if (!(qp->flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) || !vma_migratable(walk->vma) || !migrate_folio_add(folio, qp->pagelist, qp->flags)) qp->nr_failed++; } /* * Scan through folios, checking if they satisfy the required conditions, * moving them from LRU to local pagelist for migration if they do (or not). * * queue_folios_pte_range() has two possible return values: * 0 - continue walking to scan for more, even if an existing folio on the * wrong node could not be isolated and queued for migration. * -EIO - only MPOL_MF_STRICT was specified, without MPOL_MF_MOVE or ..._ALL, * and an existing folio was on a node that does not follow the policy. */ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; pte_t *pte, *mapped_pte; pte_t ptent; spinlock_t *ptl; int max_nr, nr; ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { queue_folios_pmd(pmd, walk); spin_unlock(ptl); goto out; } mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); if (!pte) { walk->action = ACTION_AGAIN; return 0; } for (; addr != end; pte += nr, addr += nr * PAGE_SIZE) { max_nr = (end - addr) >> PAGE_SHIFT; nr = 1; ptent = ptep_get(pte); if (pte_none(ptent)) continue; if (!pte_present(ptent)) { const softleaf_t entry = softleaf_from_pte(ptent); if (softleaf_is_migration(entry)) qp->nr_failed++; continue; } folio = vm_normal_folio(vma, addr, ptent); if (!folio || folio_is_zone_device(folio)) continue; if (folio_test_large(folio) && max_nr != 1) nr = folio_pte_batch(folio, pte, ptent, max_nr); /* * vm_normal_folio() filters out zero pages, but there might * still be reserved folios to skip, perhaps in a VDSO. */ if (folio_test_reserved(folio)) continue; if (!queue_folio_required(folio, qp)) continue; if (folio_test_large(folio)) { /* * A large folio can only be isolated from LRU once, * but may be mapped by many PTEs (and Copy-On-Write may * intersperse PTEs of other, order 0, folios). This is * a common case, so don't mistake it for failure (but * there can be other cases of multi-mapped pages which * this quick check does not help to filter out - and a * search of the pagelist might grow to be prohibitive). * * migrate_pages(&pagelist) returns nr_failed folios, so * check "large" now so that queue_pages_range() returns * a comparable nr_failed folios. This does imply that * if folio could not be isolated for some racy reason * at its first PTE, later PTEs will not give it another * chance of isolation; but keeps the accounting simple. */ if (folio == qp->large) continue; qp->large = folio; } if (!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) || !vma_migratable(vma) || !migrate_folio_add(folio, qp->pagelist, flags)) { qp->nr_failed += nr; if (strictly_unmovable(flags)) break; } } pte_unmap_unlock(mapped_pte, ptl); cond_resched(); out: if (qp->nr_failed && strictly_unmovable(flags)) return -EIO; return 0; } static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { #ifdef CONFIG_HUGETLB_PAGE struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; struct folio *folio; spinlock_t *ptl; pte_t ptep; ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); ptep = huge_ptep_get(walk->mm, addr, pte); if (!pte_present(ptep)) { if (!huge_pte_none(ptep)) { const softleaf_t entry = softleaf_from_pte(ptep); if (unlikely(softleaf_is_migration(entry))) qp->nr_failed++; } goto unlock; } folio = pfn_folio(pte_pfn(ptep)); if (!queue_folio_required(folio, qp)) goto unlock; if (!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) || !vma_migratable(walk->vma)) { qp->nr_failed++; goto unlock; } /* * Unless MPOL_MF_MOVE_ALL, we try to avoid migrating a shared folio. * Choosing not to migrate a shared folio is not counted as a failure. * * See folio_maybe_mapped_shared() on possible imprecision when we * cannot easily detect if a folio is shared. */ if ((flags & MPOL_MF_MOVE_ALL) || (!folio_maybe_mapped_shared(folio) && !hugetlb_pmd_shared(pte))) if (!folio_isolate_hugetlb(folio, qp->pagelist)) qp->nr_failed++; unlock: spin_unlock(ptl); if (qp->nr_failed && strictly_unmovable(flags)) return -EIO; #endif return 0; } #ifdef CONFIG_NUMA_BALANCING /** * folio_can_map_prot_numa() - check whether the folio can map prot numa * @folio: The folio whose mapping considered for being made NUMA hintable * @vma: The VMA that the folio belongs to. * @is_private_single_threaded: Is this a single-threaded private VMA or not * * This function checks to see if the folio actually indicates that * we need to make the mapping one which causes a NUMA hinting fault, * as there are cases where it's simply unnecessary, and the folio's * access time is adjusted for memory tiering if prot numa needed. * * Return: True if the mapping of the folio needs to be changed, false otherwise. */ bool folio_can_map_prot_numa(struct folio *folio, struct vm_area_struct *vma, bool is_private_single_threaded) { int nid; if (!folio || folio_is_zone_device(folio) || folio_test_ksm(folio)) return false; /* Also skip shared copy-on-write folios */ if (is_cow_mapping(vma->vm_flags) && folio_maybe_mapped_shared(folio)) return false; /* Folios are pinned and can't be migrated */ if (folio_maybe_dma_pinned(folio)) return false; /* * While migration can move some dirty folios, * it cannot move them all from MIGRATE_ASYNC * context. */ if (folio_is_file_lru(folio) && folio_test_dirty(folio)) return false; /* * Don't mess with PTEs if folio is already on the node * a single-threaded process is running on. */ nid = folio_nid(folio); if (is_private_single_threaded && (nid == numa_node_id())) return false; /* * Skip scanning top tier node if normal numa * balancing is disabled */ if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && node_is_toptier(nid)) return false; if (folio_use_access_time(folio)) folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); return true; } /* * This is used to mark a range of virtual addresses to be inaccessible. * These are later cleared by a NUMA hinting fault. Depending on these * faults, pages may be migrated for better NUMA placement. * * This is assuming that NUMA faults are handled using PROT_NONE. If * an architecture makes a different choice, it will need further * changes to the core. */ unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long addr, unsigned long end) { struct mmu_gather tlb; long nr_updated; tlb_gather_mmu(&tlb, vma->vm_mm); nr_updated = change_protection(&tlb, vma, addr, end, MM_CP_PROT_NUMA); if (nr_updated > 0) { count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated); count_memcg_events_mm(vma->vm_mm, NUMA_PTE_UPDATES, nr_updated); } tlb_finish_mmu(&tlb); return nr_updated; } #endif /* CONFIG_NUMA_BALANCING */ static int queue_pages_test_walk(unsigned long start, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *next, *vma = walk->vma; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; /* range check first */ VM_BUG_ON_VMA(!range_in_vma(vma, start, end), vma); if (!qp->first) { qp->first = vma; if (!(flags & MPOL_MF_DISCONTIG_OK) && (qp->start < vma->vm_start)) /* hole at head side of range */ return -EFAULT; } next = find_vma(vma->vm_mm, vma->vm_end); if (!(flags & MPOL_MF_DISCONTIG_OK) && ((vma->vm_end < qp->end) && (!next || vma->vm_end < next->vm_start))) /* hole at middle or tail of range */ return -EFAULT; /* * Need check MPOL_MF_STRICT to return -EIO if possible * regardless of vma_migratable */ if (!vma_migratable(vma) && !(flags & MPOL_MF_STRICT)) return 1; /* * Check page nodes, and queue pages to move, in the current vma. * But if no moving, and no strict checking, the scan can be skipped. */ if (flags & (MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) return 0; return 1; } static const struct mm_walk_ops queue_pages_walk_ops = { .hugetlb_entry = queue_folios_hugetlb, .pmd_entry = queue_folios_pte_range, .test_walk = queue_pages_test_walk, .walk_lock = PGWALK_RDLOCK, }; static const struct mm_walk_ops queue_pages_lock_vma_walk_ops = { .hugetlb_entry = queue_folios_hugetlb, .pmd_entry = queue_folios_pte_range, .test_walk = queue_pages_test_walk, .walk_lock = PGWALK_WRLOCK, }; /* * Walk through page tables and collect pages to be migrated. * * If pages found in a given range are not on the required set of @nodes, * and migration is allowed, they are isolated and queued to @pagelist. * * queue_pages_range() may return: * 0 - all pages already on the right node, or successfully queued for moving * (or neither strict checking nor moving requested: only range checking). * >0 - this number of misplaced folios could not be queued for moving * (a hugetlbfs page or a transparent huge page being counted as 1). * -EIO - a misplaced page found, when MPOL_MF_STRICT specified without MOVEs. * -EFAULT - a hole in the memory range, when MPOL_MF_DISCONTIG_OK unspecified. */ static long queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, nodemask_t *nodes, unsigned long flags, struct list_head *pagelist) { int err; struct queue_pages qp = { .pagelist = pagelist, .flags = flags, .nmask = nodes, .start = start, .end = end, .first = NULL, }; const struct mm_walk_ops *ops = (flags & MPOL_MF_WRLOCK) ? &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops; err = walk_page_range(mm, start, end, ops, &qp); if (!qp.first) /* whole range in hole */ err = -EFAULT; return err ? : qp.nr_failed; } /* * Apply policy to a single VMA * This must be called with the mmap_lock held for writing. */ static int vma_replace_policy(struct vm_area_struct *vma, struct mempolicy *pol) { int err; struct mempolicy *old; struct mempolicy *new; vma_assert_write_locked(vma); new = mpol_dup(pol); if (IS_ERR(new)) return PTR_ERR(new); if (vma->vm_ops && vma->vm_ops->set_policy) { err = vma->vm_ops->set_policy(vma, new); if (err) goto err_out; } old = vma->vm_policy; vma->vm_policy = new; /* protected by mmap_lock */ mpol_put(old); return 0; err_out: mpol_put(new); return err; } /* Split or merge the VMA (if required) and apply the new policy */ static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, struct mempolicy *new_pol) { unsigned long vmstart, vmend; vmend = min(end, vma->vm_end); if (start > vma->vm_start) { *prev = vma; vmstart = start; } else { vmstart = vma->vm_start; } if (mpol_equal(vma->vm_policy, new_pol)) { *prev = vma; return 0; } vma = vma_modify_policy(vmi, *prev, vma, vmstart, vmend, new_pol); if (IS_ERR(vma)) return PTR_ERR(vma); *prev = vma; return vma_replace_policy(vma, new_pol); } /* Set the process memory policy */ static long do_set_mempolicy(unsigned short mode, unsigned short flags, nodemask_t *nodes) { struct mempolicy *new, *old; NODEMASK_SCRATCH(scratch); int ret; if (!scratch) return -ENOMEM; new = mpol_new(mode, flags, nodes); if (IS_ERR(new)) { ret = PTR_ERR(new); goto out; } task_lock(current); ret = mpol_set_nodemask(new, nodes, scratch); if (ret) { task_unlock(current); mpol_put(new); goto out; } old = current->mempolicy; current->mempolicy = new; if (new && (new->mode == MPOL_INTERLEAVE || new->mode == MPOL_WEIGHTED_INTERLEAVE)) { current->il_prev = MAX_NUMNODES-1; current->il_weight = 0; } task_unlock(current); mpol_put(old); ret = 0; out: NODEMASK_SCRATCH_FREE(scratch); return ret; } /* * Return nodemask for policy for get_mempolicy() query * * Called with task's alloc_lock held */ static void get_policy_nodemask(struct mempolicy *pol, nodemask_t *nodes) { nodes_clear(*nodes); if (pol == &default_policy) return; switch (pol->mode) { case MPOL_BIND: case MPOL_INTERLEAVE: case MPOL_PREFERRED: case MPOL_PREFERRED_MANY: case MPOL_WEIGHTED_INTERLEAVE: *nodes = pol->nodes; break; case MPOL_LOCAL: /* return empty node mask for local allocation */ break; default: BUG(); } } static int lookup_node(struct mm_struct *mm, unsigned long addr) { struct page *p = NULL; int ret; ret = get_user_pages_fast(addr & PAGE_MASK, 1, 0, &p); if (ret > 0) { ret = page_to_nid(p); put_page(p); } return ret; } /* Retrieve NUMA policy */ static long do_get_mempolicy(int *policy, nodemask_t *nmask, unsigned long addr, unsigned long flags) { int err; struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL; struct mempolicy *pol = current->mempolicy, *pol_refcount = NULL; if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED)) return -EINVAL; if (flags & MPOL_F_MEMS_ALLOWED) { if (flags & (MPOL_F_NODE|MPOL_F_ADDR)) return -EINVAL; *policy = 0; /* just so it's initialized */ task_lock(current); *nmask = cpuset_current_mems_allowed; task_unlock(current); return 0; } if (flags & MPOL_F_ADDR) { pgoff_t ilx; /* ignored here */ /* * Do NOT fall back to task policy if the * vma/shared policy at addr is NULL. We * want to return MPOL_DEFAULT in this case. */ mmap_read_lock(mm); vma = vma_lookup(mm, addr); if (!vma) { mmap_read_unlock(mm); return -EFAULT; } pol = __get_vma_policy(vma, addr, &ilx); } else if (addr) return -EINVAL; if (!pol) pol = &default_policy; /* indicates default behavior */ if (flags & MPOL_F_NODE) { if (flags & MPOL_F_ADDR) { /* * Take a refcount on the mpol, because we are about to * drop the mmap_lock, after which only "pol" remains * valid, "vma" is stale. */ pol_refcount = pol; vma = NULL; mpol_get(pol); mmap_read_unlock(mm); err = lookup_node(mm, addr); if (err < 0) goto out; *policy = err; } else if (pol == current->mempolicy && pol->mode == MPOL_INTERLEAVE) { *policy = next_node_in(current->il_prev, pol->nodes); } else if (pol == current->mempolicy && pol->mode == MPOL_WEIGHTED_INTERLEAVE) { if (current->il_weight) *policy = current->il_prev; else *policy = next_node_in(current->il_prev, pol->nodes); } else { err = -EINVAL; goto out; } } else { *policy = pol == &default_policy ? MPOL_DEFAULT : pol->mode; /* * Internal mempolicy flags must be masked off before exposing * the policy to userspace. */ *policy |= (pol->flags & MPOL_MODE_FLAGS); } err = 0; if (nmask) { if (mpol_store_user_nodemask(pol)) { *nmask = pol->w.user_nodemask; } else { task_lock(current); get_policy_nodemask(pol, nmask); task_unlock(current); } } out: mpol_cond_put(pol); if (vma) mmap_read_unlock(mm); if (pol_refcount) mpol_put(pol_refcount); return err; } #ifdef CONFIG_MIGRATION static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags) { /* * Unless MPOL_MF_MOVE_ALL, we try to avoid migrating a shared folio. * Choosing not to migrate a shared folio is not counted as a failure. * * See folio_maybe_mapped_shared() on possible imprecision when we * cannot easily detect if a folio is shared. */ if ((flags & MPOL_MF_MOVE_ALL) || !folio_maybe_mapped_shared(folio)) { if (folio_isolate_lru(folio)) { list_add_tail(&folio->lru, foliolist); node_stat_mod_folio(folio, NR_ISOLATED_ANON + folio_is_file_lru(folio), folio_nr_pages(folio)); } else { /* * Non-movable folio may reach here. And, there may be * temporary off LRU folios or non-LRU movable folios. * Treat them as unmovable folios since they can't be * isolated, so they can't be moved at the moment. */ return false; } } return true; } /* * Migrate pages from one node to a target node. * Returns error or the number of pages not migrated. */ static long migrate_to_node(struct mm_struct *mm, int source, int dest, int flags) { nodemask_t nmask; struct vm_area_struct *vma; LIST_HEAD(pagelist); long nr_failed; long err = 0; struct migration_target_control mtc = { .nid = dest, .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, .reason = MR_SYSCALL, }; nodes_clear(nmask); node_set(source, nmask); VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); mmap_read_lock(mm); vma = find_vma(mm, 0); if (unlikely(!vma)) { mmap_read_unlock(mm); return 0; } /* * This does not migrate the range, but isolates all pages that * need migration. Between passing in the full user address * space range and MPOL_MF_DISCONTIG_OK, this call cannot fail, * but passes back the count of pages which could not be isolated. */ nr_failed = queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask, flags | MPOL_MF_DISCONTIG_OK, &pagelist); mmap_read_unlock(mm); if (!list_empty(&pagelist)) { err = migrate_pages(&pagelist, alloc_migration_target, NULL, (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL); if (err) putback_movable_pages(&pagelist); } if (err >= 0) err += nr_failed; return err; } /* * Move pages between the two nodesets so as to preserve the physical * layout as much as possible. * * Returns the number of page that could not be moved. */ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags) { long nr_failed = 0; long err = 0; nodemask_t tmp; lru_cache_disable(); /* * Find a 'source' bit set in 'tmp' whose corresponding 'dest' * bit in 'to' is not also set in 'tmp'. Clear the found 'source' * bit in 'tmp', and return that <source, dest> pair for migration. * The pair of nodemasks 'to' and 'from' define the map. * * If no pair of bits is found that way, fallback to picking some * pair of 'source' and 'dest' bits that are not the same. If the * 'source' and 'dest' bits are the same, this represents a node * that will be migrating to itself, so no pages need move. * * If no bits are left in 'tmp', or if all remaining bits left * in 'tmp' correspond to the same bit in 'to', return false * (nothing left to migrate). * * This lets us pick a pair of nodes to migrate between, such that * if possible the dest node is not already occupied by some other * source node, minimizing the risk of overloading the memory on a * node that would happen if we migrated incoming memory to a node * before migrating outgoing memory source that same node. * * A single scan of tmp is sufficient. As we go, we remember the * most recent <s, d> pair that moved (s != d). If we find a pair * that not only moved, but what's better, moved to an empty slot * (d is not set in tmp), then we break out then, with that pair. * Otherwise when we finish scanning from_tmp, we at least have the * most recent <s, d> pair that moved. If we get all the way through * the scan of tmp without finding any node that moved, much less * moved to an empty node, then there is nothing left worth migrating. */ tmp = *from; while (!nodes_empty(tmp)) { int s, d; int source = NUMA_NO_NODE; int dest = 0; for_each_node_mask(s, tmp) { /* * do_migrate_pages() tries to maintain the relative * node relationship of the pages established between * threads and memory areas. * * However if the number of source nodes is not equal to * the number of destination nodes we can not preserve * this node relative relationship. In that case, skip * copying memory from a node that is in the destination * mask. * * Example: [2,3,4] -> [3,4,5] moves everything. * [0-7] - > [3,4,5] moves only 0,1,2,6,7. */ if ((nodes_weight(*from) != nodes_weight(*to)) && (node_isset(s, *to))) continue; d = node_remap(s, *from, *to); if (s == d) continue; source = s; /* Node moved. Memorize */ dest = d; /* dest not in remaining from nodes? */ if (!node_isset(dest, tmp)) break; } if (source == NUMA_NO_NODE) break; node_clear(source, tmp); err = migrate_to_node(mm, source, dest, flags); if (err > 0) nr_failed += err; if (err < 0) break; } lru_cache_enable(); if (err < 0) return err; return (nr_failed < INT_MAX) ? nr_failed : INT_MAX; } /* * Allocate a new folio for page migration, according to NUMA mempolicy. */ static struct folio *alloc_migration_target_by_mpol(struct folio *src, unsigned long private) { struct migration_mpol *mmpol = (struct migration_mpol *)private; struct mempolicy *pol = mmpol->pol; pgoff_t ilx = mmpol->ilx; unsigned int order; int nid = numa_node_id(); gfp_t gfp; order = folio_order(src); ilx += src->index >> order; if (folio_test_hugetlb(src)) { nodemask_t *nodemask; struct hstate *h; h = folio_hstate(src); gfp = htlb_alloc_mask(h); nodemask = policy_nodemask(gfp, pol, ilx, &nid); return alloc_hugetlb_folio_nodemask(h, nid, nodemask, gfp, htlb_allow_alloc_fallback(MR_MEMPOLICY_MBIND)); } if (folio_test_large(src)) gfp = GFP_TRANSHUGE; else gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL | __GFP_COMP; return folio_alloc_mpol(gfp, order, pol, ilx, nid); } #else static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist, unsigned long flags) { return false; } int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags) { return -ENOSYS; } static struct folio *alloc_migration_target_by_mpol(struct folio *src, unsigned long private) { return NULL; } #endif static long do_mbind(unsigned long start, unsigned long len, unsigned short mode, unsigned short mode_flags, nodemask_t *nmask, unsigned long flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; struct vma_iterator vmi; struct migration_mpol mmpol; struct mempolicy *new; unsigned long end; long err; long nr_failed; LIST_HEAD(pagelist); if (flags & ~(unsigned long)MPOL_MF_VALID) return -EINVAL; if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) return -EPERM; if (start & ~PAGE_MASK) return -EINVAL; if (mode == MPOL_DEFAULT) flags &= ~MPOL_MF_STRICT; len = PAGE_ALIGN(len); end = start + len; if (end < start) return -EINVAL; if (end == start) return 0; new = mpol_new(mode, mode_flags, nmask); if (IS_ERR(new)) return PTR_ERR(new); /* * If we are using the default policy then operation * on discontinuous address spaces is okay after all */ if (!new) flags |= MPOL_MF_DISCONTIG_OK; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) lru_cache_disable(); { NODEMASK_SCRATCH(scratch); if (scratch) { mmap_write_lock(mm); err = mpol_set_nodemask(new, nmask, scratch); if (err) mmap_write_unlock(mm); } else err = -ENOMEM; NODEMASK_SCRATCH_FREE(scratch); } if (err) goto mpol_out; /* * Lock the VMAs before scanning for pages to migrate, * to ensure we don't miss a concurrently inserted page. */ nr_failed = queue_pages_range(mm, start, end, nmask, flags | MPOL_MF_INVERT | MPOL_MF_WRLOCK, &pagelist); if (nr_failed < 0) { err = nr_failed; nr_failed = 0; } else { vma_iter_init(&vmi, mm, start); prev = vma_prev(&vmi); for_each_vma_range(vmi, vma, end) { err = mbind_range(&vmi, vma, &prev, start, end, new); if (err) break; } } if (!err && !list_empty(&pagelist)) { /* Convert MPOL_DEFAULT's NULL to task or default policy */ if (!new) { new = get_task_policy(current); mpol_get(new); } mmpol.pol = new; mmpol.ilx = 0; /* * In the interleaved case, attempt to allocate on exactly the * targeted nodes, for the first VMA to be migrated; for later * VMAs, the nodes will still be interleaved from the targeted * nodemask, but one by one may be selected differently. */ if (new->mode == MPOL_INTERLEAVE || new->mode == MPOL_WEIGHTED_INTERLEAVE) { struct folio *folio; unsigned int order; unsigned long addr = -EFAULT; list_for_each_entry(folio, &pagelist, lru) { if (!folio_test_ksm(folio)) break; } if (!list_entry_is_head(folio, &pagelist, lru)) { vma_iter_init(&vmi, mm, start); for_each_vma_range(vmi, vma, end) { addr = page_address_in_vma(folio, folio_page(folio, 0), vma); if (addr != -EFAULT) break; } } if (addr != -EFAULT) { order = folio_order(folio); /* We already know the pol, but not the ilx */ mpol_cond_put(get_vma_policy(vma, addr, order, &mmpol.ilx)); /* Set base from which to increment by index */ mmpol.ilx -= folio->index >> order; } } } mmap_write_unlock(mm); if (!err && !list_empty(&pagelist)) { nr_failed |= migrate_pages(&pagelist, alloc_migration_target_by_mpol, NULL, (unsigned long)&mmpol, MIGRATE_SYNC, MR_MEMPOLICY_MBIND, NULL); } if (nr_failed && (flags & MPOL_MF_STRICT)) err = -EIO; if (!list_empty(&pagelist)) putback_movable_pages(&pagelist); mpol_out: mpol_put(new); if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) lru_cache_enable(); return err; } /* * User space interface with variable sized bitmaps for nodelists. */ static int get_bitmap(unsigned long *mask, const unsigned long __user *nmask, unsigned long maxnode) { unsigned long nlongs = BITS_TO_LONGS(maxnode); int ret; if (in_compat_syscall()) ret = compat_get_bitmap(mask, (const compat_ulong_t __user *)nmask, maxnode); else ret = copy_from_user(mask, nmask, nlongs * sizeof(unsigned long)); if (ret) return -EFAULT; if (maxnode % BITS_PER_LONG) mask[nlongs - 1] &= (1UL << (maxnode % BITS_PER_LONG)) - 1; return 0; } /* Copy a node mask from user space. */ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, unsigned long maxnode) { --maxnode; nodes_clear(*nodes); if (maxnode == 0 || !nmask) return 0; if (maxnode > PAGE_SIZE*BITS_PER_BYTE) return -EINVAL; /* * When the user specified more nodes than supported just check * if the non supported part is all zero, one word at a time, * starting at the end. */ while (maxnode > MAX_NUMNODES) { unsigned long bits = min_t(unsigned long, maxnode, BITS_PER_LONG); unsigned long t; if (get_bitmap(&t, &nmask[(maxnode - 1) / BITS_PER_LONG], bits)) return -EFAULT; if (maxnode - bits >= MAX_NUMNODES) { maxnode -= bits; } else { maxnode = MAX_NUMNODES; t &= ~((1UL << (MAX_NUMNODES % BITS_PER_LONG)) - 1); } if (t) return -EINVAL; } return get_bitmap(nodes_addr(*nodes), nmask, maxnode); } /* Copy a kernel node mask to user space */ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode, nodemask_t *nodes) { unsigned long copy = ALIGN(maxnode-1, 64) / 8; unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long); bool compat = in_compat_syscall(); if (compat) nbytes = BITS_TO_COMPAT_LONGS(nr_node_ids) * sizeof(compat_long_t); if (copy > nbytes) { if (copy > PAGE_SIZE) return -EINVAL; if (clear_user((char __user *)mask + nbytes, copy - nbytes)) return -EFAULT; copy = nbytes; maxnode = nr_node_ids; } if (compat) return compat_put_bitmap((compat_ulong_t __user *)mask, nodes_addr(*nodes), maxnode); return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0; } /* Basic parameter sanity check used by both mbind() and set_mempolicy() */ static inline int sanitize_mpol_flags(int *mode, unsigned short *flags) { *flags = *mode & MPOL_MODE_FLAGS; *mode &= ~MPOL_MODE_FLAGS; if ((unsigned int)(*mode) >= MPOL_MAX) return -EINVAL; if ((*flags & MPOL_F_STATIC_NODES) && (*flags & MPOL_F_RELATIVE_NODES)) return -EINVAL; if (*flags & MPOL_F_NUMA_BALANCING) { if (*mode == MPOL_BIND || *mode == MPOL_PREFERRED_MANY) *flags |= (MPOL_F_MOF | MPOL_F_MORON); else return -EINVAL; } return 0; } static long kernel_mbind(unsigned long start, unsigned long len, unsigned long mode, const unsigned long __user *nmask, unsigned long maxnode, unsigned int flags) { unsigned short mode_flags; nodemask_t nodes; int lmode = mode; int err; start = untagged_addr(start); err = sanitize_mpol_flags(&lmode, &mode_flags); if (err) return err; err = get_nodes(&nodes, nmask, maxnode); if (err) return err; return do_mbind(start, len, lmode, mode_flags, &nodes, flags); } SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, len, unsigned long, home_node, unsigned long, flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; struct mempolicy *new, *old; unsigned long end; int err = -ENOENT; VMA_ITERATOR(vmi, mm, start); start = untagged_addr(start); if (start & ~PAGE_MASK) return -EINVAL; /* * flags is used for future extension if any. */ if (flags != 0) return -EINVAL; /* * Check home_node is online to avoid accessing uninitialized * NODE_DATA. */ if (home_node >= MAX_NUMNODES || !node_online(home_node)) return -EINVAL; len = PAGE_ALIGN(len); end = start + len; if (end < start) return -EINVAL; if (end == start) return 0; mmap_write_lock(mm); prev = vma_prev(&vmi); for_each_vma_range(vmi, vma, end) { /* * If any vma in the range got policy other than MPOL_BIND * or MPOL_PREFERRED_MANY we return error. We don't reset * the home node for vmas we already updated before. */ old = vma_policy(vma); if (!old) { prev = vma; continue; } if (old->mode != MPOL_BIND && old->mode != MPOL_PREFERRED_MANY) { err = -EOPNOTSUPP; break; } new = mpol_dup(old); if (IS_ERR(new)) { err = PTR_ERR(new); break; } vma_start_write(vma); new->home_node = home_node; err = mbind_range(&vmi, vma, &prev, start, end, new); mpol_put(new); if (err) break; } mmap_write_unlock(mm); return err; } SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, unsigned long, mode, const unsigned long __user *, nmask, unsigned long, maxnode, unsigned int, flags) { return kernel_mbind(start, len, mode, nmask, maxnode, flags); } /* Set the process memory policy */ static long kernel_set_mempolicy(int mode, const unsigned long __user *nmask, unsigned long maxnode) { unsigned short mode_flags; nodemask_t nodes; int lmode = mode; int err; err = sanitize_mpol_flags(&lmode, &mode_flags); if (err) return err; err = get_nodes(&nodes, nmask, maxnode); if (err) return err; return do_set_mempolicy(lmode, mode_flags, &nodes); } SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask, unsigned long, maxnode) { return kernel_set_mempolicy(mode, nmask, maxnode); } static int kernel_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *old_nodes, const unsigned long __user *new_nodes) { struct mm_struct *mm = NULL; struct task_struct *task; nodemask_t task_nodes; int err; nodemask_t *old; nodemask_t *new; NODEMASK_SCRATCH(scratch); if (!scratch) return -ENOMEM; old = &scratch->mask1; new = &scratch->mask2; err = get_nodes(old, old_nodes, maxnode); if (err) goto out; err = get_nodes(new, new_nodes, maxnode); if (err) goto out; /* Find the mm_struct */ rcu_read_lock(); task = pid ? find_task_by_vpid(pid) : current; if (!task) { rcu_read_unlock(); err = -ESRCH; goto out; } get_task_struct(task); err = -EINVAL; /* * Check if this process has the right to modify the specified process. * Use the regular "ptrace_may_access()" checks. */ if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { rcu_read_unlock(); err = -EPERM; goto out_put; } rcu_read_unlock(); task_nodes = cpuset_mems_allowed(task); /* Is the user allowed to access the target nodes? */ if (!nodes_subset(*new, task_nodes) && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out_put; } task_nodes = cpuset_mems_allowed(current); nodes_and(*new, *new, task_nodes); if (nodes_empty(*new)) goto out_put; err = security_task_movememory(task); if (err) goto out_put; mm = get_task_mm(task); put_task_struct(task); if (!mm) { err = -EINVAL; goto out; } err = do_migrate_pages(mm, old, new, capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE); mmput(mm); out: NODEMASK_SCRATCH_FREE(scratch); return err; out_put: put_task_struct(task); goto out; } SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, const unsigned long __user *, old_nodes, const unsigned long __user *, new_nodes) { return kernel_migrate_pages(pid, maxnode, old_nodes, new_nodes); } /* Retrieve NUMA policy */ static int kernel_get_mempolicy(int __user *policy, unsigned long __user *nmask, unsigned long maxnode, unsigned long addr, unsigned long flags) { int err; int pval; nodemask_t nodes; if (nmask != NULL && maxnode < nr_node_ids) return -EINVAL; addr = untagged_addr(addr); err = do_get_mempolicy(&pval, &nodes, addr, flags); if (err) return err; if (policy && put_user(pval, policy)) return -EFAULT; if (nmask) err = copy_nodes_to_user(nmask, maxnode, &nodes); return err; } SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, unsigned long __user *, nmask, unsigned long, maxnode, unsigned long, addr, unsigned long, flags) { return kernel_get_mempolicy(policy, nmask, maxnode, addr, flags); } bool vma_migratable(struct vm_area_struct *vma) { if (vma->vm_flags & (VM_IO | VM_PFNMAP)) return false; /* * DAX device mappings require predictable access latency, so avoid * incurring periodic faults. */ if (vma_is_dax(vma)) return false; if (is_vm_hugetlb_page(vma) && !hugepage_migration_supported(hstate_vma(vma))) return false; /* * Migration allocates pages in the highest zone. If we cannot * do so then migration (at least from node to node) is not * possible. */ if (vma->vm_file && gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping)) < policy_zone) return false; return true; } struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, unsigned long addr, pgoff_t *ilx) { *ilx = 0; return (vma->vm_ops && vma->vm_ops->get_policy) ? vma->vm_ops->get_policy(vma, addr, ilx) : vma->vm_policy; } /* * get_vma_policy(@vma, @addr, @order, @ilx) * @vma: virtual memory area whose policy is sought * @addr: address in @vma for shared policy lookup * @order: 0, or appropriate huge_page_order for interleaving * @ilx: interleave index (output), for use only when MPOL_INTERLEAVE or * MPOL_WEIGHTED_INTERLEAVE * * Returns effective policy for a VMA at specified address. * Falls back to current->mempolicy or system default policy, as necessary. * Shared policies [those marked as MPOL_F_SHARED] require an extra reference * count--added by the get_policy() vm_op, as appropriate--to protect against * freeing by another task. It is the caller's responsibility to free the * extra reference for shared policies. */ struct mempolicy *get_vma_policy(struct vm_area_struct *vma, unsigned long addr, int order, pgoff_t *ilx) { struct mempolicy *pol; pol = __get_vma_policy(vma, addr, ilx); if (!pol) pol = get_task_policy(current); if (pol->mode == MPOL_INTERLEAVE || pol->mode == MPOL_WEIGHTED_INTERLEAVE) { *ilx += vma->vm_pgoff >> order; *ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order); } return pol; } bool vma_policy_mof(struct vm_area_struct *vma) { struct mempolicy *pol; if (vma->vm_ops && vma->vm_ops->get_policy) { bool ret = false; pgoff_t ilx; /* ignored here */ pol = vma->vm_ops->get_policy(vma, vma->vm_start, &ilx); if (pol && (pol->flags & MPOL_F_MOF)) ret = true; mpol_cond_put(pol); return ret; } pol = vma->vm_policy; if (!pol) pol = get_task_policy(current); return pol->flags & MPOL_F_MOF; } bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone) { enum zone_type dynamic_policy_zone = policy_zone; BUG_ON(dynamic_policy_zone == ZONE_MOVABLE); /* * if policy->nodes has movable memory only, * we apply policy when gfp_zone(gfp) = ZONE_MOVABLE only. * * policy->nodes is intersect with node_states[N_MEMORY]. * so if the following test fails, it implies * policy->nodes has movable memory only. */ if (!nodes_intersects(policy->nodes, node_states[N_HIGH_MEMORY])) dynamic_policy_zone = ZONE_MOVABLE; return zone >= dynamic_policy_zone; } static unsigned int weighted_interleave_nodes(struct mempolicy *policy) { unsigned int node; unsigned int cpuset_mems_cookie; retry: /* to prevent miscount use tsk->mems_allowed_seq to detect rebind */ cpuset_mems_cookie = read_mems_allowed_begin(); node = current->il_prev; if (!current->il_weight || !node_isset(node, policy->nodes)) { node = next_node_in(node, policy->nodes); if (read_mems_allowed_retry(cpuset_mems_cookie)) goto retry; if (node == MAX_NUMNODES) return node; current->il_prev = node; current->il_weight = get_il_weight(node); } current->il_weight--; return node; } /* Do dynamic interleaving for a process */ static unsigned int interleave_nodes(struct mempolicy *policy) { unsigned int nid; unsigned int cpuset_mems_cookie; /* to prevent miscount, use tsk->mems_allowed_seq to detect rebind */ do { cpuset_mems_cookie = read_mems_allowed_begin(); nid = next_node_in(current->il_prev, policy->nodes); } while (read_mems_allowed_retry(cpuset_mems_cookie)); if (nid < MAX_NUMNODES) current->il_prev = nid; return nid; } /* * Depending on the memory policy provide a node from which to allocate the * next slab entry. */ unsigned int mempolicy_slab_node(void) { struct mempolicy *policy; int node = numa_mem_id(); if (!in_task()) return node; policy = current->mempolicy; if (!policy) return node; switch (policy->mode) { case MPOL_PREFERRED: return first_node(policy->nodes); case MPOL_INTERLEAVE: return interleave_nodes(policy); case MPOL_WEIGHTED_INTERLEAVE: return weighted_interleave_nodes(policy); case MPOL_BIND: case MPOL_PREFERRED_MANY: { struct zoneref *z; /* * Follow bind policy behavior and start allocation at the * first node. */ struct zonelist *zonelist; enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL); zonelist = &NODE_DATA(node)->node_zonelists[ZONELIST_FALLBACK]; z = first_zones_zonelist(zonelist, highest_zoneidx, &policy->nodes); return zonelist_zone(z) ? zonelist_node_idx(z) : node; } case MPOL_LOCAL: return node; default: BUG(); } } static unsigned int read_once_policy_nodemask(struct mempolicy *pol, nodemask_t *mask) { /* * barrier stabilizes the nodemask locally so that it can be iterated * over safely without concern for changes. Allocators validate node * selection does not violate mems_allowed, so this is safe. */ barrier(); memcpy(mask, &pol->nodes, sizeof(nodemask_t)); barrier(); return nodes_weight(*mask); } static unsigned int weighted_interleave_nid(struct mempolicy *pol, pgoff_t ilx) { struct weighted_interleave_state *state; nodemask_t nodemask; unsigned int target, nr_nodes; u8 *table = NULL; unsigned int weight_total = 0; u8 weight; int nid = 0; nr_nodes = read_once_policy_nodemask(pol, &nodemask); if (!nr_nodes) return numa_node_id(); rcu_read_lock(); state = rcu_dereference(wi_state); /* Uninitialized wi_state means we should assume all weights are 1 */ if (state) table = state->iw_table; /* calculate the total weight */ for_each_node_mask(nid, nodemask) weight_total += table ? table[nid] : 1; /* Calculate the node offset based on totals */ target = ilx % weight_total; nid = first_node(nodemask); while (target) { /* detect system default usage */ weight = table ? table[nid] : 1; if (target < weight) break; target -= weight; nid = next_node_in(nid, nodemask); } rcu_read_unlock(); return nid; } /* * Do static interleaving for interleave index @ilx. Returns the ilx'th * node in pol->nodes (starting from ilx=0), wrapping around if ilx * exceeds the number of present nodes. */ static unsigned int interleave_nid(struct mempolicy *pol, pgoff_t ilx) { nodemask_t nodemask; unsigned int target, nnodes; int i; int nid; nnodes = read_once_policy_nodemask(pol, &nodemask); if (!nnodes) return numa_node_id(); target = ilx % nnodes; nid = first_node(nodemask); for (i = 0; i < target; i++) nid = next_node(nid, nodemask); return nid; } /* * Return a nodemask representing a mempolicy for filtering nodes for * page allocation, together with preferred node id (or the input node id). */ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol, pgoff_t ilx, int *nid) { nodemask_t *nodemask = NULL; switch (pol->mode) { case MPOL_PREFERRED: /* Override input node id */ *nid = first_node(pol->nodes); break; case MPOL_PREFERRED_MANY: nodemask = &pol->nodes; if (pol->home_node != NUMA_NO_NODE) *nid = pol->home_node; break; case MPOL_BIND: /* Restrict to nodemask (but not on lower zones) */ if (apply_policy_zone(pol, gfp_zone(gfp)) && cpuset_nodemask_valid_mems_allowed(&pol->nodes)) nodemask = &pol->nodes; if (pol->home_node != NUMA_NO_NODE) *nid = pol->home_node; /* * __GFP_THISNODE shouldn't even be used with the bind policy * because we might easily break the expectation to stay on the * requested node and not break the policy. */ WARN_ON_ONCE(gfp & __GFP_THISNODE); break; case MPOL_INTERLEAVE: /* Override input node id */ *nid = (ilx == NO_INTERLEAVE_INDEX) ? interleave_nodes(pol) : interleave_nid(pol, ilx); break; case MPOL_WEIGHTED_INTERLEAVE: *nid = (ilx == NO_INTERLEAVE_INDEX) ? weighted_interleave_nodes(pol) : weighted_interleave_nid(pol, ilx); break; } return nodemask; } #ifdef CONFIG_HUGETLBFS /* * huge_node(@vma, @addr, @gfp_flags, @mpol) * @vma: virtual memory area whose policy is sought * @addr: address in @vma for shared policy lookup and interleave policy * @gfp_flags: for requested zone * @mpol: pointer to mempolicy pointer for reference counted mempolicy * @nodemask: pointer to nodemask pointer for 'bind' and 'prefer-many' policy * * Returns a nid suitable for a huge page allocation and a pointer * to the struct mempolicy for conditional unref after allocation. * If the effective policy is 'bind' or 'prefer-many', returns a pointer * to the mempolicy's @nodemask for filtering the zonelist. */ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, nodemask_t **nodemask) { pgoff_t ilx; int nid; nid = numa_node_id(); *mpol = get_vma_policy(vma, addr, hstate_vma(vma)->order, &ilx); *nodemask = policy_nodemask(gfp_flags, *mpol, ilx, &nid); return nid; } /* * init_nodemask_of_mempolicy * * If the current task's mempolicy is "default" [NULL], return 'false' * to indicate default policy. Otherwise, extract the policy nodemask * for 'bind' or 'interleave' policy into the argument nodemask, or * initialize the argument nodemask to contain the single node for * 'preferred' or 'local' policy and return 'true' to indicate presence * of non-default mempolicy. * * We don't bother with reference counting the mempolicy [mpol_get/put] * because the current task is examining it's own mempolicy and a task's * mempolicy is only ever changed by the task itself. * * N.B., it is the caller's responsibility to free a returned nodemask. */ bool init_nodemask_of_mempolicy(nodemask_t *mask) { struct mempolicy *mempolicy; if (!(mask && current->mempolicy)) return false; task_lock(current); mempolicy = current->mempolicy; switch (mempolicy->mode) { case MPOL_PREFERRED: case MPOL_PREFERRED_MANY: case MPOL_BIND: case MPOL_INTERLEAVE: case MPOL_WEIGHTED_INTERLEAVE: *mask = mempolicy->nodes; break; case MPOL_LOCAL: init_nodemask_of_node(mask, numa_node_id()); break; default: BUG(); } task_unlock(current); return true; } #endif /* * mempolicy_in_oom_domain * * If tsk's mempolicy is "bind", check for intersection between mask and * the policy nodemask. Otherwise, return true for all other policies * including "interleave", as a tsk with "interleave" policy may have * memory allocated from all nodes in system. * * Takes task_lock(tsk) to prevent freeing of its mempolicy. */ bool mempolicy_in_oom_domain(struct task_struct *tsk, const nodemask_t *mask) { struct mempolicy *mempolicy; bool ret = true; if (!mask) return ret; task_lock(tsk); mempolicy = tsk->mempolicy; if (mempolicy && mempolicy->mode == MPOL_BIND) ret = nodes_intersects(mempolicy->nodes, *mask); task_unlock(tsk); return ret; } static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order, int nid, nodemask_t *nodemask) { struct page *page; gfp_t preferred_gfp; /* * This is a two pass approach. The first pass will only try the * preferred nodes but skip the direct reclaim and allow the * allocation to fail, while the second pass will try all the * nodes in system. */ preferred_gfp = gfp | __GFP_NOWARN; preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); page = __alloc_frozen_pages_noprof(preferred_gfp, order, nid, nodemask); if (!page) page = __alloc_frozen_pages_noprof(gfp, order, nid, NULL); return page; } /** * alloc_pages_mpol - Allocate pages according to NUMA mempolicy. * @gfp: GFP flags. * @order: Order of the page allocation. * @pol: Pointer to the NUMA mempolicy. * @ilx: Index for interleave mempolicy (also distinguishes alloc_pages()). * @nid: Preferred node (usually numa_node_id() but @mpol may override it). * * Return: The page on success or NULL if allocation fails. */ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, struct mempolicy *pol, pgoff_t ilx, int nid) { nodemask_t *nodemask; struct page *page; nodemask = policy_nodemask(gfp, pol, ilx, &nid); if (pol->mode == MPOL_PREFERRED_MANY) return alloc_pages_preferred_many(gfp, order, nid, nodemask); if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && /* filter "hugepage" allocation, unless from alloc_pages() */ order == HPAGE_PMD_ORDER && ilx != NO_INTERLEAVE_INDEX) { /* * For hugepage allocation and non-interleave policy which * allows the current node (or other explicitly preferred * node) we only try to allocate from the current/preferred * node and don't fall back to other nodes, as the cost of * remote accesses would likely offset THP benefits. * * If the policy is interleave or does not allow the current * node in its nodemask, we allocate the standard way. */ if (pol->mode != MPOL_INTERLEAVE && pol->mode != MPOL_WEIGHTED_INTERLEAVE && (!nodemask || node_isset(nid, *nodemask))) { /* * First, try to allocate THP only on local node, but * don't reclaim unnecessarily, just compact. */ page = __alloc_frozen_pages_noprof( gfp | __GFP_THISNODE | __GFP_NORETRY, order, nid, NULL); if (page || !(gfp & __GFP_DIRECT_RECLAIM)) return page; /* * If hugepage allocations are configured to always * synchronous compact or the vma has been madvised * to prefer hugepage backing, retry allowing remote * memory with both reclaim and compact as well. */ } } page = __alloc_frozen_pages_noprof(gfp, order, nid, nodemask); if (unlikely(pol->mode == MPOL_INTERLEAVE || pol->mode == MPOL_WEIGHTED_INTERLEAVE) && page) { /* skip NUMA_INTERLEAVE_HIT update if numa stats is disabled */ if (static_branch_likely(&vm_numa_stat_key) && page_to_nid(page) == nid) { preempt_disable(); __count_numa_event(page_zone(page), NUMA_INTERLEAVE_HIT); preempt_enable(); } } return page; } struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *pol, pgoff_t ilx, int nid) { struct page *page = alloc_pages_mpol(gfp | __GFP_COMP, order, pol, ilx, nid); if (!page) return NULL; set_page_refcounted(page); return page_rmappable_folio(page); } /** * vma_alloc_folio - Allocate a folio for a VMA. * @gfp: GFP flags. * @order: Order of the folio. * @vma: Pointer to VMA. * @addr: Virtual address of the allocation. Must be inside @vma. * * Allocate a folio for a specific address in @vma, using the appropriate * NUMA policy. The caller must hold the mmap_lock of the mm_struct of the * VMA to prevent it from going away. Should be used for all allocations * for folios that will be mapped into user space, excepting hugetlbfs, and * excepting where direct use of folio_alloc_mpol() is more appropriate. * * Return: The folio on success or NULL if allocation fails. */ struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol; pgoff_t ilx; struct folio *folio; if (vma->vm_flags & VM_DROPPABLE) gfp |= __GFP_NOWARN; pol = get_vma_policy(vma, addr, order, &ilx); folio = folio_alloc_mpol_noprof(gfp, order, pol, ilx, numa_node_id()); mpol_cond_put(pol); return folio; } EXPORT_SYMBOL(vma_alloc_folio_noprof); struct page *alloc_frozen_pages_noprof(gfp_t gfp, unsigned order) { struct mempolicy *pol = &default_policy; /* * No reference counting needed for current->mempolicy * nor system default_policy */ if (!in_interrupt() && !(gfp & __GFP_THISNODE)) pol = get_task_policy(current); return alloc_pages_mpol(gfp, order, pol, NO_INTERLEAVE_INDEX, numa_node_id()); } /** * alloc_pages - Allocate pages. * @gfp: GFP flags. * @order: Power of two of number of pages to allocate. * * Allocate 1 << @order contiguous pages. The physical address of the * first page is naturally aligned (eg an order-3 allocation will be aligned * to a multiple of 8 * PAGE_SIZE bytes). The NUMA policy of the current * process is honoured when in process context. * * Context: Can be called from any context, providing the appropriate GFP * flags are used. * Return: The page on success or NULL if allocation fails. */ struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order) { struct page *page = alloc_frozen_pages_noprof(gfp, order); if (page) set_page_refcounted(page); return page; } EXPORT_SYMBOL(alloc_pages_noprof); struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order) { return page_rmappable_folio(alloc_pages_noprof(gfp | __GFP_COMP, order)); } EXPORT_SYMBOL(folio_alloc_noprof); static unsigned long alloc_pages_bulk_interleave(gfp_t gfp, struct mempolicy *pol, unsigned long nr_pages, struct page **page_array) { int nodes; unsigned long nr_pages_per_node; int delta; int i; unsigned long nr_allocated; unsigned long total_allocated = 0; nodes = nodes_weight(pol->nodes); nr_pages_per_node = nr_pages / nodes; delta = nr_pages - nodes * nr_pages_per_node; for (i = 0; i < nodes; i++) { if (delta) { nr_allocated = alloc_pages_bulk_noprof(gfp, interleave_nodes(pol), NULL, nr_pages_per_node + 1, page_array); delta--; } else { nr_allocated = alloc_pages_bulk_noprof(gfp, interleave_nodes(pol), NULL, nr_pages_per_node, page_array); } page_array += nr_allocated; total_allocated += nr_allocated; } return total_allocated; } static unsigned long alloc_pages_bulk_weighted_interleave(gfp_t gfp, struct mempolicy *pol, unsigned long nr_pages, struct page **page_array) { struct weighted_interleave_state *state; struct task_struct *me = current; unsigned int cpuset_mems_cookie; unsigned long total_allocated = 0; unsigned long nr_allocated = 0; unsigned long rounds; unsigned long node_pages, delta; u8 *weights, weight; unsigned int weight_total = 0; unsigned long rem_pages = nr_pages; nodemask_t nodes; int nnodes, node; int resume_node = MAX_NUMNODES - 1; u8 resume_weight = 0; int prev_node; int i; if (!nr_pages) return 0; /* read the nodes onto the stack, retry if done during rebind */ do { cpuset_mems_cookie = read_mems_allowed_begin(); nnodes = read_once_policy_nodemask(pol, &nodes); } while (read_mems_allowed_retry(cpuset_mems_cookie)); /* if the nodemask has become invalid, we cannot do anything */ if (!nnodes) return 0; /* Continue allocating from most recent node and adjust the nr_pages */ node = me->il_prev; weight = me->il_weight; if (weight && node_isset(node, nodes)) { node_pages = min(rem_pages, weight); nr_allocated = __alloc_pages_bulk(gfp, node, NULL, node_pages, page_array); page_array += nr_allocated; total_allocated += nr_allocated; /* if that's all the pages, no need to interleave */ if (rem_pages <= weight) { me->il_weight -= rem_pages; return total_allocated; } /* Otherwise we adjust remaining pages, continue from there */ rem_pages -= weight; } /* clear active weight in case of an allocation failure */ me->il_weight = 0; prev_node = node; /* create a local copy of node weights to operate on outside rcu */ weights = kzalloc(nr_node_ids, GFP_KERNEL); if (!weights) return total_allocated; rcu_read_lock(); state = rcu_dereference(wi_state); if (state) { memcpy(weights, state->iw_table, nr_node_ids * sizeof(u8)); rcu_read_unlock(); } else { rcu_read_unlock(); for (i = 0; i < nr_node_ids; i++) weights[i] = 1; } /* calculate total, detect system default usage */ for_each_node_mask(node, nodes) weight_total += weights[node]; /* * Calculate rounds/partial rounds to minimize __alloc_pages_bulk calls. * Track which node weighted interleave should resume from. * * if (rounds > 0) and (delta == 0), resume_node will always be * the node following prev_node and its weight. */ rounds = rem_pages / weight_total; delta = rem_pages % weight_total; resume_node = next_node_in(prev_node, nodes); resume_weight = weights[resume_node]; for (i = 0; i < nnodes; i++) { node = next_node_in(prev_node, nodes); weight = weights[node]; node_pages = weight * rounds; /* If a delta exists, add this node's portion of the delta */ if (delta > weight) { node_pages += weight; delta -= weight; } else if (delta) { /* when delta is depleted, resume from that node */ node_pages += delta; resume_node = node; resume_weight = weight - delta; delta = 0; } /* node_pages can be 0 if an allocation fails and rounds == 0 */ if (!node_pages) break; nr_allocated = __alloc_pages_bulk(gfp, node, NULL, node_pages, page_array); page_array += nr_allocated; total_allocated += nr_allocated; if (total_allocated == nr_pages) break; prev_node = node; } me->il_prev = resume_node; me->il_weight = resume_weight; kfree(weights); return total_allocated; } static unsigned long alloc_pages_bulk_preferred_many(gfp_t gfp, int nid, struct mempolicy *pol, unsigned long nr_pages, struct page **page_array) { gfp_t preferred_gfp; unsigned long nr_allocated = 0; preferred_gfp = gfp | __GFP_NOWARN; preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); nr_allocated = alloc_pages_bulk_noprof(preferred_gfp, nid, &pol->nodes, nr_pages, page_array); if (nr_allocated < nr_pages) nr_allocated += alloc_pages_bulk_noprof(gfp, numa_node_id(), NULL, nr_pages - nr_allocated, page_array + nr_allocated); return nr_allocated; } /* alloc pages bulk and mempolicy should be considered at the * same time in some situation such as vmalloc. * * It can accelerate memory allocation especially interleaving * allocate memory. */ unsigned long alloc_pages_bulk_mempolicy_noprof(gfp_t gfp, unsigned long nr_pages, struct page **page_array) { struct mempolicy *pol = &default_policy; nodemask_t *nodemask; int nid; if (!in_interrupt() && !(gfp & __GFP_THISNODE)) pol = get_task_policy(current); if (pol->mode == MPOL_INTERLEAVE) return alloc_pages_bulk_interleave(gfp, pol, nr_pages, page_array); if (pol->mode == MPOL_WEIGHTED_INTERLEAVE) return alloc_pages_bulk_weighted_interleave( gfp, pol, nr_pages, page_array); if (pol->mode == MPOL_PREFERRED_MANY) return alloc_pages_bulk_preferred_many(gfp, numa_node_id(), pol, nr_pages, page_array); nid = numa_node_id(); nodemask = policy_nodemask(gfp, pol, NO_INTERLEAVE_INDEX, &nid); return alloc_pages_bulk_noprof(gfp, nid, nodemask, nr_pages, page_array); } int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { struct mempolicy *pol = mpol_dup(src->vm_policy); if (IS_ERR(pol)) return PTR_ERR(pol); dst->vm_policy = pol; return 0; } /* * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it * rebinds the mempolicy its copying by calling mpol_rebind_policy() * with the mems_allowed returned by cpuset_mems_allowed(). This * keeps mempolicies cpuset relative after its cpuset moves. See * further kernel/cpuset.c update_nodemask(). * * current's mempolicy may be rebinded by the other task(the task that changes * cpuset's mems), so we needn't do rebind work for current task. */ /* Slow path of a mempolicy duplicate */ struct mempolicy *__mpol_dup(struct mempolicy *old) { struct mempolicy *new = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!new) return ERR_PTR(-ENOMEM); /* task's mempolicy is protected by alloc_lock */ if (old == current->mempolicy) { task_lock(current); *new = *old; task_unlock(current); } else *new = *old; if (current_cpuset_is_being_rebound()) { nodemask_t mems = cpuset_mems_allowed(current); mpol_rebind_policy(new, &mems); } atomic_set(&new->refcnt, 1); return new; } /* Slow path of a mempolicy comparison */ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) { if (!a || !b) return false; if (a->mode != b->mode) return false; if (a->flags != b->flags) return false; if (a->home_node != b->home_node) return false; if (mpol_store_user_nodemask(a)) if (!nodes_equal(a->w.user_nodemask, b->w.user_nodemask)) return false; switch (a->mode) { case MPOL_BIND: case MPOL_INTERLEAVE: case MPOL_PREFERRED: case MPOL_PREFERRED_MANY: case MPOL_WEIGHTED_INTERLEAVE: return !!nodes_equal(a->nodes, b->nodes); case MPOL_LOCAL: return true; default: BUG(); return false; } } /* * Shared memory backing store policy support. * * Remember policies even when nobody has shared memory mapped. * The policies are kept in Red-Black tree linked from the inode. * They are protected by the sp->lock rwlock, which should be held * for any accesses to the tree. */ /* * lookup first element intersecting start-end. Caller holds sp->lock for * reading or for writing */ static struct sp_node *sp_lookup(struct shared_policy *sp, pgoff_t start, pgoff_t end) { struct rb_node *n = sp->root.rb_node; while (n) { struct sp_node *p = rb_entry(n, struct sp_node, nd); if (start >= p->end) n = n->rb_right; else if (end <= p->start) n = n->rb_left; else break; } if (!n) return NULL; for (;;) { struct sp_node *w = NULL; struct rb_node *prev = rb_prev(n); if (!prev) break; w = rb_entry(prev, struct sp_node, nd); if (w->end <= start) break; n = prev; } return rb_entry(n, struct sp_node, nd); } /* * Insert a new shared policy into the list. Caller holds sp->lock for * writing. */ static void sp_insert(struct shared_policy *sp, struct sp_node *new) { struct rb_node **p = &sp->root.rb_node; struct rb_node *parent = NULL; struct sp_node *nd; while (*p) { parent = *p; nd = rb_entry(parent, struct sp_node, nd); if (new->start < nd->start) p = &(*p)->rb_left; else if (new->end > nd->end) p = &(*p)->rb_right; else BUG(); } rb_link_node(&new->nd, parent, p); rb_insert_color(&new->nd, &sp->root); } /* Find shared policy intersecting idx */ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx) { struct mempolicy *pol = NULL; struct sp_node *sn; if (!sp->root.rb_node) return NULL; read_lock(&sp->lock); sn = sp_lookup(sp, idx, idx+1); if (sn) { mpol_get(sn->policy); pol = sn->policy; } read_unlock(&sp->lock); return pol; } EXPORT_SYMBOL_FOR_MODULES(mpol_shared_policy_lookup, "kvm"); static void sp_free(struct sp_node *n) { mpol_put(n->policy); kmem_cache_free(sn_cache, n); } /** * mpol_misplaced - check whether current folio node is valid in policy * * @folio: folio to be checked * @vmf: structure describing the fault * @addr: virtual address in @vma for shared policy lookup and interleave policy * * Lookup current policy node id for vma,addr and "compare to" folio's * node id. Policy determination "mimics" alloc_page_vma(). * Called from fault path where we know the vma and faulting address. * * Return: NUMA_NO_NODE if the page is in a node that is valid for this * policy, or a suitable node ID to allocate a replacement folio from. */ int mpol_misplaced(struct folio *folio, struct vm_fault *vmf, unsigned long addr) { struct mempolicy *pol; pgoff_t ilx; struct zoneref *z; int curnid = folio_nid(folio); struct vm_area_struct *vma = vmf->vma; int thiscpu = raw_smp_processor_id(); int thisnid = numa_node_id(); int polnid = NUMA_NO_NODE; int ret = NUMA_NO_NODE; /* * Make sure ptl is held so that we don't preempt and we * have a stable smp processor id */ lockdep_assert_held(vmf->ptl); pol = get_vma_policy(vma, addr, folio_order(folio), &ilx); if (!(pol->flags & MPOL_F_MOF)) goto out; switch (pol->mode) { case MPOL_INTERLEAVE: polnid = interleave_nid(pol, ilx); break; case MPOL_WEIGHTED_INTERLEAVE: polnid = weighted_interleave_nid(pol, ilx); break; case MPOL_PREFERRED: if (node_isset(curnid, pol->nodes)) goto out; polnid = first_node(pol->nodes); break; case MPOL_LOCAL: polnid = numa_node_id(); break; case MPOL_BIND: case MPOL_PREFERRED_MANY: /* * Even though MPOL_PREFERRED_MANY can allocate pages outside * policy nodemask we don't allow numa migration to nodes * outside policy nodemask for now. This is done so that if we * want demotion to slow memory to happen, before allocating * from some DRAM node say 'x', we will end up using a * MPOL_PREFERRED_MANY mask excluding node 'x'. In such scenario * we should not promote to node 'x' from slow memory node. */ if (pol->flags & MPOL_F_MORON) { /* * Optimize placement among multiple nodes * via NUMA balancing */ if (node_isset(thisnid, pol->nodes)) break; goto out; } /* * use current page if in policy nodemask, * else select nearest allowed node, if any. * If no allowed nodes, use current [!misplaced]. */ if (node_isset(curnid, pol->nodes)) goto out; z = first_zones_zonelist( node_zonelist(thisnid, GFP_HIGHUSER), gfp_zone(GFP_HIGHUSER), &pol->nodes); polnid = zonelist_node_idx(z); break; default: BUG(); } /* Migrate the folio towards the node whose CPU is referencing it */ if (pol->flags & MPOL_F_MORON) { polnid = thisnid; if (!should_numa_migrate_memory(current, folio, curnid, thiscpu)) goto out; } if (curnid != polnid) ret = polnid; out: mpol_cond_put(pol); return ret; } /* * Drop the (possibly final) reference to task->mempolicy. It needs to be * dropped after task->mempolicy is set to NULL so that any allocation done as * part of its kmem_cache_free(), such as by KASAN, doesn't reference a freed * policy. */ void mpol_put_task_policy(struct task_struct *task) { struct mempolicy *pol; task_lock(task); pol = task->mempolicy; task->mempolicy = NULL; task_unlock(task); mpol_put(pol); } static void sp_delete(struct shared_policy *sp, struct sp_node *n) { rb_erase(&n->nd, &sp->root); sp_free(n); } static void sp_node_init(struct sp_node *node, unsigned long start, unsigned long end, struct mempolicy *pol) { node->start = start; node->end = end; node->policy = pol; } static struct sp_node *sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol) { struct sp_node *n; struct mempolicy *newpol; n = kmem_cache_alloc(sn_cache, GFP_KERNEL); if (!n) return NULL; newpol = mpol_dup(pol); if (IS_ERR(newpol)) { kmem_cache_free(sn_cache, n); return NULL; } newpol->flags |= MPOL_F_SHARED; sp_node_init(n, start, end, newpol); return n; } /* Replace a policy range. */ static int shared_policy_replace(struct shared_policy *sp, pgoff_t start, pgoff_t end, struct sp_node *new) { struct sp_node *n; struct sp_node *n_new = NULL; struct mempolicy *mpol_new = NULL; int ret = 0; restart: write_lock(&sp->lock); n = sp_lookup(sp, start, end); /* Take care of old policies in the same range. */ while (n && n->start < end) { struct rb_node *next = rb_next(&n->nd); if (n->start >= start) { if (n->end <= end) sp_delete(sp, n); else n->start = end; } else { /* Old policy spanning whole new range. */ if (n->end > end) { if (!n_new) goto alloc_new; *mpol_new = *n->policy; atomic_set(&mpol_new->refcnt, 1); sp_node_init(n_new, end, n->end, mpol_new); n->end = start; sp_insert(sp, n_new); n_new = NULL; mpol_new = NULL; break; } else n->end = start; } if (!next) break; n = rb_entry(next, struct sp_node, nd); } if (new) sp_insert(sp, new); write_unlock(&sp->lock); ret = 0; err_out: if (mpol_new) mpol_put(mpol_new); if (n_new) kmem_cache_free(sn_cache, n_new); return ret; alloc_new: write_unlock(&sp->lock); ret = -ENOMEM; n_new = kmem_cache_alloc(sn_cache, GFP_KERNEL); if (!n_new) goto err_out; mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!mpol_new) goto err_out; atomic_set(&mpol_new->refcnt, 1); goto restart; } /** * mpol_shared_policy_init - initialize shared policy for inode * @sp: pointer to inode shared policy * @mpol: struct mempolicy to install * * Install non-NULL @mpol in inode's shared policy rb-tree. * On entry, the current task has a reference on a non-NULL @mpol. * This must be released on exit. * This is called at get_inode() calls and we can use GFP_KERNEL. */ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) { int ret; sp->root = RB_ROOT; /* empty tree == default mempolicy */ rwlock_init(&sp->lock); if (mpol) { struct sp_node *sn; struct mempolicy *npol; NODEMASK_SCRATCH(scratch); if (!scratch) goto put_mpol; /* contextualize the tmpfs mount point mempolicy to this file */ npol = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); if (IS_ERR(npol)) goto free_scratch; /* no valid nodemask intersection */ task_lock(current); ret = mpol_set_nodemask(npol, &mpol->w.user_nodemask, scratch); task_unlock(current); if (ret) goto put_npol; /* alloc node covering entire file; adds ref to file's npol */ sn = sp_alloc(0, MAX_LFS_FILESIZE >> PAGE_SHIFT, npol); if (sn) sp_insert(sp, sn); put_npol: mpol_put(npol); /* drop initial ref on file's npol */ free_scratch: NODEMASK_SCRATCH_FREE(scratch); put_mpol: mpol_put(mpol); /* drop our incoming ref on sb mpol */ } } EXPORT_SYMBOL_FOR_MODULES(mpol_shared_policy_init, "kvm"); int mpol_set_shared_policy(struct shared_policy *sp, struct vm_area_struct *vma, struct mempolicy *pol) { int err; struct sp_node *new = NULL; unsigned long sz = vma_pages(vma); if (pol) { new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, pol); if (!new) return -ENOMEM; } err = shared_policy_replace(sp, vma->vm_pgoff, vma->vm_pgoff + sz, new); if (err && new) sp_free(new); return err; } EXPORT_SYMBOL_FOR_MODULES(mpol_set_shared_policy, "kvm"); /* Free a backing policy store on inode delete. */ void mpol_free_shared_policy(struct shared_policy *sp) { struct sp_node *n; struct rb_node *next; if (!sp->root.rb_node) return; write_lock(&sp->lock); next = rb_first(&sp->root); while (next) { n = rb_entry(next, struct sp_node, nd); next = rb_next(&n->nd); sp_delete(sp, n); } write_unlock(&sp->lock); } EXPORT_SYMBOL_FOR_MODULES(mpol_free_shared_policy, "kvm"); #ifdef CONFIG_NUMA_BALANCING static int __initdata numabalancing_override; static void __init check_numabalancing_enable(void) { bool numabalancing_default = false; if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED)) numabalancing_default = true; /* Parsed by setup_numabalancing. override == 1 enables, -1 disables */ if (numabalancing_override) set_numabalancing_state(numabalancing_override == 1); if (num_online_nodes() > 1 && !numabalancing_override) { pr_info("%s automatic NUMA balancing. Configure with numa_balancing= or the kernel.numa_balancing sysctl\n", numabalancing_default ? "Enabling" : "Disabling"); set_numabalancing_state(numabalancing_default); } } static int __init setup_numabalancing(char *str) { int ret = 0; if (!str) goto out; if (!strcmp(str, "enable")) { numabalancing_override = 1; ret = 1; } else if (!strcmp(str, "disable")) { numabalancing_override = -1; ret = 1; } out: if (!ret) pr_warn("Unable to parse numa_balancing=\n"); return ret; } __setup("numa_balancing=", setup_numabalancing); #else static inline void __init check_numabalancing_enable(void) { } #endif /* CONFIG_NUMA_BALANCING */ void __init numa_policy_init(void) { nodemask_t interleave_nodes; unsigned long largest = 0; int nid, prefer = 0; policy_cache = kmem_cache_create("numa_policy", sizeof(struct mempolicy), 0, SLAB_PANIC, NULL); sn_cache = kmem_cache_create("shared_policy_node", sizeof(struct sp_node), 0, SLAB_PANIC, NULL); for_each_node(nid) { preferred_node_policy[nid] = (struct mempolicy) { .refcnt = ATOMIC_INIT(1), .mode = MPOL_PREFERRED, .flags = MPOL_F_MOF | MPOL_F_MORON, .nodes = nodemask_of_node(nid), }; } /* * Set interleaving policy for system init. Interleaving is only * enabled across suitably sized nodes (default is >= 16MB), or * fall back to the largest node if they're all smaller. */ nodes_clear(interleave_nodes); for_each_node_state(nid, N_MEMORY) { unsigned long total_pages = node_present_pages(nid); /* Preserve the largest node */ if (largest < total_pages) { largest = total_pages; prefer = nid; } /* Interleave this node? */ if ((total_pages << PAGE_SHIFT) >= (16 << 20)) node_set(nid, interleave_nodes); } /* All too small, use the largest */ if (unlikely(nodes_empty(interleave_nodes))) node_set(prefer, interleave_nodes); if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes)) pr_err("%s: interleaving failed\n", __func__); check_numabalancing_enable(); } /* Reset policy of current process to default */ void numa_default_policy(void) { do_set_mempolicy(MPOL_DEFAULT, 0, NULL); } /* * Parse and format mempolicy from/to strings */ static const char * const policy_modes[] = { [MPOL_DEFAULT] = "default", [MPOL_PREFERRED] = "prefer", [MPOL_BIND] = "bind", [MPOL_INTERLEAVE] = "interleave", [MPOL_WEIGHTED_INTERLEAVE] = "weighted interleave", [MPOL_LOCAL] = "local", [MPOL_PREFERRED_MANY] = "prefer (many)", }; #ifdef CONFIG_TMPFS /** * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option. * @str: string containing mempolicy to parse * @mpol: pointer to struct mempolicy pointer, returned on success. * * Format of input: * <mode>[=<flags>][:<nodelist>] * * Return: %0 on success, else %1 */ int mpol_parse_str(char *str, struct mempolicy **mpol) { struct mempolicy *new = NULL; unsigned short mode_flags; nodemask_t nodes; char *nodelist = strchr(str, ':'); char *flags = strchr(str, '='); int err = 1, mode; if (flags) *flags++ = '\0'; /* terminate mode string */ if (nodelist) { /* NUL-terminate mode or flags string */ *nodelist++ = '\0'; if (nodelist_parse(nodelist, nodes)) goto out; if (!nodes_subset(nodes, node_states[N_MEMORY])) goto out; } else nodes_clear(nodes); mode = match_string(policy_modes, MPOL_MAX, str); if (mode < 0) goto out; switch (mode) { case MPOL_PREFERRED: /* * Insist on a nodelist of one node only, although later * we use first_node(nodes) to grab a single node, so here * nodelist (or nodes) cannot be empty. */ if (nodelist) { char *rest = nodelist; while (isdigit(*rest)) rest++; if (*rest) goto out; if (nodes_empty(nodes)) goto out; } break; case MPOL_INTERLEAVE: case MPOL_WEIGHTED_INTERLEAVE: /* * Default to online nodes with memory if no nodelist */ if (!nodelist) nodes = node_states[N_MEMORY]; break; case MPOL_LOCAL: /* * Don't allow a nodelist; mpol_new() checks flags */ if (nodelist) goto out; break; case MPOL_DEFAULT: /* * Insist on a empty nodelist */ if (!nodelist) err = 0; goto out; case MPOL_PREFERRED_MANY: case MPOL_BIND: /* * Insist on a nodelist */ if (!nodelist) goto out; } mode_flags = 0; if (flags) { /* * Currently, we only support two mutually exclusive * mode flags. */ if (!strcmp(flags, "static")) mode_flags |= MPOL_F_STATIC_NODES; else if (!strcmp(flags, "relative")) mode_flags |= MPOL_F_RELATIVE_NODES; else goto out; } new = mpol_new(mode, mode_flags, &nodes); if (IS_ERR(new)) goto out; /* * Save nodes for mpol_to_str() to show the tmpfs mount options * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo. */ if (mode != MPOL_PREFERRED) { new->nodes = nodes; } else if (nodelist) { nodes_clear(new->nodes); node_set(first_node(nodes), new->nodes); } else { new->mode = MPOL_LOCAL; } /* * Save nodes for contextualization: this will be used to "clone" * the mempolicy in a specific context [cpuset] at a later time. */ new->w.user_nodemask = nodes; err = 0; out: /* Restore string for error message */ if (nodelist) *--nodelist = ':'; if (flags) *--flags = '='; if (!err) *mpol = new; return err; } #endif /* CONFIG_TMPFS */ /** * mpol_to_str - format a mempolicy structure for printing * @buffer: to contain formatted mempolicy string * @maxlen: length of @buffer * @pol: pointer to mempolicy to be formatted * * Convert @pol into a string. If @buffer is too short, truncate the string. * Recommend a @maxlen of at least 51 for the longest mode, "weighted * interleave", plus the longest flag flags, "relative|balancing", and to * display at least a few node ids. */ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) { char *p = buffer; nodemask_t nodes = NODE_MASK_NONE; unsigned short mode = MPOL_DEFAULT; unsigned short flags = 0; if (pol && pol != &default_policy && !(pol >= &preferred_node_policy[0] && pol <= &preferred_node_policy[ARRAY_SIZE(preferred_node_policy) - 1])) { mode = pol->mode; flags = pol->flags; } switch (mode) { case MPOL_DEFAULT: case MPOL_LOCAL: break; case MPOL_PREFERRED: case MPOL_PREFERRED_MANY: case MPOL_BIND: case MPOL_INTERLEAVE: case MPOL_WEIGHTED_INTERLEAVE: nodes = pol->nodes; break; default: WARN_ON_ONCE(1); snprintf(p, maxlen, "unknown"); return; } p += snprintf(p, maxlen, "%s", policy_modes[mode]); if (flags & MPOL_MODE_FLAGS) { p += snprintf(p, buffer + maxlen - p, "="); /* * Static and relative are mutually exclusive. */ if (flags & MPOL_F_STATIC_NODES) p += snprintf(p, buffer + maxlen - p, "static"); else if (flags & MPOL_F_RELATIVE_NODES) p += snprintf(p, buffer + maxlen - p, "relative"); if (flags & MPOL_F_NUMA_BALANCING) { if (!is_power_of_2(flags & MPOL_MODE_FLAGS)) p += snprintf(p, buffer + maxlen - p, "|"); p += snprintf(p, buffer + maxlen - p, "balancing"); } } if (!nodes_empty(nodes)) p += scnprintf(p, buffer + maxlen - p, ":%*pbl", nodemask_pr_args(&nodes)); } #ifdef CONFIG_SYSFS struct iw_node_attr { struct kobj_attribute kobj_attr; int nid; }; struct sysfs_wi_group { struct kobject wi_kobj; struct mutex kobj_lock; struct iw_node_attr *nattrs[]; }; static struct sysfs_wi_group *wi_group; static ssize_t node_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct iw_node_attr *node_attr; u8 weight; node_attr = container_of(attr, struct iw_node_attr, kobj_attr); weight = get_il_weight(node_attr->nid); return sysfs_emit(buf, "%d\n", weight); } static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct weighted_interleave_state *new_wi_state, *old_wi_state = NULL; struct iw_node_attr *node_attr; u8 weight = 0; int i; node_attr = container_of(attr, struct iw_node_attr, kobj_attr); if (count == 0 || sysfs_streq(buf, "") || kstrtou8(buf, 0, &weight) || weight == 0) return -EINVAL; new_wi_state = kzalloc(struct_size(new_wi_state, iw_table, nr_node_ids), GFP_KERNEL); if (!new_wi_state) return -ENOMEM; mutex_lock(&wi_state_lock); old_wi_state = rcu_dereference_protected(wi_state, lockdep_is_held(&wi_state_lock)); if (old_wi_state) { memcpy(new_wi_state->iw_table, old_wi_state->iw_table, nr_node_ids * sizeof(u8)); } else { for (i = 0; i < nr_node_ids; i++) new_wi_state->iw_table[i] = 1; } new_wi_state->iw_table[node_attr->nid] = weight; new_wi_state->mode_auto = false; rcu_assign_pointer(wi_state, new_wi_state); mutex_unlock(&wi_state_lock); if (old_wi_state) { synchronize_rcu(); kfree(old_wi_state); } return count; } static ssize_t weighted_interleave_auto_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct weighted_interleave_state *state; bool wi_auto = true; rcu_read_lock(); state = rcu_dereference(wi_state); if (state) wi_auto = state->mode_auto; rcu_read_unlock(); return sysfs_emit(buf, "%s\n", str_true_false(wi_auto)); } static ssize_t weighted_interleave_auto_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct weighted_interleave_state *new_wi_state, *old_wi_state = NULL; unsigned int *bw; bool input; int i; if (kstrtobool(buf, &input)) return -EINVAL; new_wi_state = kzalloc(struct_size(new_wi_state, iw_table, nr_node_ids), GFP_KERNEL); if (!new_wi_state) return -ENOMEM; for (i = 0; i < nr_node_ids; i++) new_wi_state->iw_table[i] = 1; mutex_lock(&wi_state_lock); if (!input) { old_wi_state = rcu_dereference_protected(wi_state, lockdep_is_held(&wi_state_lock)); if (!old_wi_state) goto update_wi_state; if (input == old_wi_state->mode_auto) { mutex_unlock(&wi_state_lock); return count; } memcpy(new_wi_state->iw_table, old_wi_state->iw_table, nr_node_ids * sizeof(u8)); goto update_wi_state; } bw = node_bw_table; if (!bw) { mutex_unlock(&wi_state_lock); kfree(new_wi_state); return -ENODEV; } new_wi_state->mode_auto = true; reduce_interleave_weights(bw, new_wi_state->iw_table); update_wi_state: rcu_assign_pointer(wi_state, new_wi_state); mutex_unlock(&wi_state_lock); if (old_wi_state) { synchronize_rcu(); kfree(old_wi_state); } return count; } static void sysfs_wi_node_delete(int nid) { struct iw_node_attr *attr; if (nid < 0 || nid >= nr_node_ids) return; mutex_lock(&wi_group->kobj_lock); attr = wi_group->nattrs[nid]; if (!attr) { mutex_unlock(&wi_group->kobj_lock); return; } wi_group->nattrs[nid] = NULL; mutex_unlock(&wi_group->kobj_lock); sysfs_remove_file(&wi_group->wi_kobj, &attr->kobj_attr.attr); kfree(attr->kobj_attr.attr.name); kfree(attr); } static void sysfs_wi_node_delete_all(void) { int nid; for (nid = 0; nid < nr_node_ids; nid++) sysfs_wi_node_delete(nid); } static void wi_state_free(void) { struct weighted_interleave_state *old_wi_state; mutex_lock(&wi_state_lock); old_wi_state = rcu_dereference_protected(wi_state, lockdep_is_held(&wi_state_lock)); rcu_assign_pointer(wi_state, NULL); mutex_unlock(&wi_state_lock); if (old_wi_state) { synchronize_rcu(); kfree(old_wi_state); } } static struct kobj_attribute wi_auto_attr = __ATTR(auto, 0664, weighted_interleave_auto_show, weighted_interleave_auto_store); static void wi_cleanup(void) { sysfs_remove_file(&wi_group->wi_kobj, &wi_auto_attr.attr); sysfs_wi_node_delete_all(); wi_state_free(); } static void wi_kobj_release(struct kobject *wi_kobj) { kfree(wi_group); } static const struct kobj_type wi_ktype = { .sysfs_ops = &kobj_sysfs_ops, .release = wi_kobj_release, }; static int sysfs_wi_node_add(int nid) { int ret; char *name; struct iw_node_attr *new_attr; if (nid < 0 || nid >= nr_node_ids) { pr_err("invalid node id: %d\n", nid); return -EINVAL; } new_attr = kzalloc(sizeof(*new_attr), GFP_KERNEL); if (!new_attr) return -ENOMEM; name = kasprintf(GFP_KERNEL, "node%d", nid); if (!name) { kfree(new_attr); return -ENOMEM; } sysfs_attr_init(&new_attr->kobj_attr.attr); new_attr->kobj_attr.attr.name = name; new_attr->kobj_attr.attr.mode = 0644; new_attr->kobj_attr.show = node_show; new_attr->kobj_attr.store = node_store; new_attr->nid = nid; mutex_lock(&wi_group->kobj_lock); if (wi_group->nattrs[nid]) { mutex_unlock(&wi_group->kobj_lock); ret = -EEXIST; goto out; } ret = sysfs_create_file(&wi_group->wi_kobj, &new_attr->kobj_attr.attr); if (ret) { mutex_unlock(&wi_group->kobj_lock); goto out; } wi_group->nattrs[nid] = new_attr; mutex_unlock(&wi_group->kobj_lock); return 0; out: kfree(new_attr->kobj_attr.attr.name); kfree(new_attr); return ret; } static int wi_node_notifier(struct notifier_block *nb, unsigned long action, void *data) { int err; struct node_notify *nn = data; int nid = nn->nid; switch (action) { case NODE_ADDED_FIRST_MEMORY: err = sysfs_wi_node_add(nid); if (err) pr_err("failed to add sysfs for node%d during hotplug: %d\n", nid, err); break; case NODE_REMOVED_LAST_MEMORY: sysfs_wi_node_delete(nid); break; } return NOTIFY_OK; } static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj) { int nid, err; wi_group = kzalloc(struct_size(wi_group, nattrs, nr_node_ids), GFP_KERNEL); if (!wi_group) return -ENOMEM; mutex_init(&wi_group->kobj_lock); err = kobject_init_and_add(&wi_group->wi_kobj, &wi_ktype, mempolicy_kobj, "weighted_interleave"); if (err) goto err_put_kobj; err = sysfs_create_file(&wi_group->wi_kobj, &wi_auto_attr.attr); if (err) goto err_put_kobj; for_each_online_node(nid) { if (!node_state(nid, N_MEMORY)) continue; err = sysfs_wi_node_add(nid); if (err) { pr_err("failed to add sysfs for node%d during init: %d\n", nid, err); goto err_cleanup_kobj; } } hotplug_node_notifier(wi_node_notifier, DEFAULT_CALLBACK_PRI); return 0; err_cleanup_kobj: wi_cleanup(); kobject_del(&wi_group->wi_kobj); err_put_kobj: kobject_put(&wi_group->wi_kobj); return err; } static int __init mempolicy_sysfs_init(void) { int err; static struct kobject *mempolicy_kobj; mempolicy_kobj = kobject_create_and_add("mempolicy", mm_kobj); if (!mempolicy_kobj) return -ENOMEM; err = add_weighted_interleave_group(mempolicy_kobj); if (err) goto err_kobj; return 0; err_kobj: kobject_del(mempolicy_kobj); kobject_put(mempolicy_kobj); return err; } late_initcall(mempolicy_sysfs_init); #endif /* CONFIG_SYSFS */ |
| 2 2 2 2 2 2 1 1 3 3 1 3 1 2 2 2 2 2 1 1 4 1 2 1 6 1 3 1 1 5 5 2 2 1 1 1 83 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 | // SPDX-License-Identifier: GPL-2.0 /* * Media device request objects * * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2018 Intel Corporation * Copyright (C) 2018 Google, Inc. * * Author: Hans Verkuil <hverkuil@kernel.org> * Author: Sakari Ailus <sakari.ailus@linux.intel.com> */ #include <linux/anon_inodes.h> #include <linux/file.h> #include <linux/refcount.h> #include <media/media-device.h> #include <media/media-request.h> static const char * const request_state[] = { [MEDIA_REQUEST_STATE_IDLE] = "idle", [MEDIA_REQUEST_STATE_VALIDATING] = "validating", [MEDIA_REQUEST_STATE_QUEUED] = "queued", [MEDIA_REQUEST_STATE_COMPLETE] = "complete", [MEDIA_REQUEST_STATE_CLEANING] = "cleaning", [MEDIA_REQUEST_STATE_UPDATING] = "updating", }; static const char * media_request_state_str(enum media_request_state state) { BUILD_BUG_ON(ARRAY_SIZE(request_state) != NR_OF_MEDIA_REQUEST_STATE); if (WARN_ON(state >= ARRAY_SIZE(request_state))) return "invalid"; return request_state[state]; } static void media_request_clean(struct media_request *req) { struct media_request_object *obj, *obj_safe; /* Just a sanity check. No other code path is allowed to change this. */ WARN_ON(req->state != MEDIA_REQUEST_STATE_CLEANING); WARN_ON(req->updating_count); WARN_ON(req->access_count); list_for_each_entry_safe(obj, obj_safe, &req->objects, list) { media_request_object_unbind(obj); media_request_object_put(obj); } req->updating_count = 0; req->access_count = 0; WARN_ON(req->num_incomplete_objects); req->num_incomplete_objects = 0; wake_up_interruptible_all(&req->poll_wait); } static void media_request_release(struct kref *kref) { struct media_request *req = container_of(kref, struct media_request, kref); struct media_device *mdev = req->mdev; dev_dbg(mdev->dev, "request: release %s\n", req->debug_str); /* No other users, no need for a spinlock */ req->state = MEDIA_REQUEST_STATE_CLEANING; media_request_clean(req); if (mdev->ops->req_free) mdev->ops->req_free(req); else kfree(req); } void media_request_put(struct media_request *req) { kref_put(&req->kref, media_request_release); } EXPORT_SYMBOL_GPL(media_request_put); static int media_request_close(struct inode *inode, struct file *filp) { struct media_request *req = filp->private_data; media_request_put(req); return 0; } static __poll_t media_request_poll(struct file *filp, struct poll_table_struct *wait) { struct media_request *req = filp->private_data; unsigned long flags; __poll_t ret = 0; if (!(poll_requested_events(wait) & EPOLLPRI)) return 0; poll_wait(filp, &req->poll_wait, wait); spin_lock_irqsave(&req->lock, flags); if (req->state == MEDIA_REQUEST_STATE_COMPLETE) { ret = EPOLLPRI; goto unlock; } if (req->state != MEDIA_REQUEST_STATE_QUEUED) { ret = EPOLLERR; goto unlock; } unlock: spin_unlock_irqrestore(&req->lock, flags); return ret; } static long media_request_ioctl_queue(struct media_request *req) { struct media_device *mdev = req->mdev; enum media_request_state state; unsigned long flags; int ret; dev_dbg(mdev->dev, "request: queue %s\n", req->debug_str); /* * Ensure the request that is validated will be the one that gets queued * next by serialising the queueing process. This mutex is also used * to serialize with canceling a vb2 queue and with setting values such * as controls in a request. */ mutex_lock(&mdev->req_queue_mutex); media_request_get(req); spin_lock_irqsave(&req->lock, flags); if (req->state == MEDIA_REQUEST_STATE_IDLE) req->state = MEDIA_REQUEST_STATE_VALIDATING; state = req->state; spin_unlock_irqrestore(&req->lock, flags); if (state != MEDIA_REQUEST_STATE_VALIDATING) { dev_dbg(mdev->dev, "request: unable to queue %s, request in state %s\n", req->debug_str, media_request_state_str(state)); media_request_put(req); mutex_unlock(&mdev->req_queue_mutex); return -EBUSY; } ret = mdev->ops->req_validate(req); /* * If the req_validate was successful, then we mark the state as QUEUED * and call req_queue. The reason we set the state first is that this * allows req_queue to unbind or complete the queued objects in case * they are immediately 'consumed'. State changes from QUEUED to another * state can only happen if either the driver changes the state or if * the user cancels the vb2 queue. The driver can only change the state * after each object is queued through the req_queue op (and note that * that op cannot fail), so setting the state to QUEUED up front is * safe. * * The other reason for changing the state is if the vb2 queue is * canceled, and that uses the req_queue_mutex which is still locked * while req_queue is called, so that's safe as well. */ spin_lock_irqsave(&req->lock, flags); req->state = ret ? MEDIA_REQUEST_STATE_IDLE : MEDIA_REQUEST_STATE_QUEUED; spin_unlock_irqrestore(&req->lock, flags); if (!ret) mdev->ops->req_queue(req); mutex_unlock(&mdev->req_queue_mutex); if (ret) { dev_dbg(mdev->dev, "request: can't queue %s (%d)\n", req->debug_str, ret); media_request_put(req); } return ret; } static long media_request_ioctl_reinit(struct media_request *req) { struct media_device *mdev = req->mdev; unsigned long flags; spin_lock_irqsave(&req->lock, flags); if (req->state != MEDIA_REQUEST_STATE_IDLE && req->state != MEDIA_REQUEST_STATE_COMPLETE) { dev_dbg(mdev->dev, "request: %s not in idle or complete state, cannot reinit\n", req->debug_str); spin_unlock_irqrestore(&req->lock, flags); return -EBUSY; } if (req->access_count) { dev_dbg(mdev->dev, "request: %s is being accessed, cannot reinit\n", req->debug_str); spin_unlock_irqrestore(&req->lock, flags); return -EBUSY; } req->state = MEDIA_REQUEST_STATE_CLEANING; spin_unlock_irqrestore(&req->lock, flags); media_request_clean(req); spin_lock_irqsave(&req->lock, flags); req->state = MEDIA_REQUEST_STATE_IDLE; spin_unlock_irqrestore(&req->lock, flags); return 0; } static long media_request_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct media_request *req = filp->private_data; switch (cmd) { case MEDIA_REQUEST_IOC_QUEUE: return media_request_ioctl_queue(req); case MEDIA_REQUEST_IOC_REINIT: return media_request_ioctl_reinit(req); default: return -ENOIOCTLCMD; } } static const struct file_operations request_fops = { .owner = THIS_MODULE, .poll = media_request_poll, .unlocked_ioctl = media_request_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = media_request_ioctl, #endif /* CONFIG_COMPAT */ .release = media_request_close, }; struct media_request * media_request_get_by_fd(struct media_device *mdev, int request_fd) { struct media_request *req; if (!mdev || !mdev->ops || !mdev->ops->req_validate || !mdev->ops->req_queue) return ERR_PTR(-EBADR); CLASS(fd, f)(request_fd); if (fd_empty(f)) goto err; if (fd_file(f)->f_op != &request_fops) goto err; req = fd_file(f)->private_data; if (req->mdev != mdev) goto err; /* * Note: as long as someone has an open filehandle of the request, * the request can never be released. The fdget() above ensures that * even if userspace closes the request filehandle, the release() * fop won't be called, so the media_request_get() always succeeds * and there is no race condition where the request was released * before media_request_get() is called. */ media_request_get(req); return req; err: dev_dbg(mdev->dev, "cannot find request_fd %d\n", request_fd); return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(media_request_get_by_fd); int media_request_alloc(struct media_device *mdev, int *alloc_fd) { struct media_request *req; struct file *filp; int fd; int ret; /* Either both are NULL or both are non-NULL */ if (WARN_ON(!mdev->ops->req_alloc ^ !mdev->ops->req_free)) return -ENOMEM; if (mdev->ops->req_alloc) req = mdev->ops->req_alloc(mdev); else req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { ret = fd; goto err_free_req; } filp = anon_inode_getfile("request", &request_fops, NULL, O_CLOEXEC); if (IS_ERR(filp)) { ret = PTR_ERR(filp); goto err_put_fd; } filp->private_data = req; req->mdev = mdev; req->state = MEDIA_REQUEST_STATE_IDLE; req->num_incomplete_objects = 0; kref_init(&req->kref); INIT_LIST_HEAD(&req->objects); spin_lock_init(&req->lock); init_waitqueue_head(&req->poll_wait); req->updating_count = 0; req->access_count = 0; *alloc_fd = fd; snprintf(req->debug_str, sizeof(req->debug_str), "%u:%d", atomic_inc_return(&mdev->request_id), fd); dev_dbg(mdev->dev, "request: allocated %s\n", req->debug_str); fd_install(fd, filp); return 0; err_put_fd: put_unused_fd(fd); err_free_req: if (mdev->ops->req_free) mdev->ops->req_free(req); else kfree(req); return ret; } static void media_request_object_release(struct kref *kref) { struct media_request_object *obj = container_of(kref, struct media_request_object, kref); struct media_request *req = obj->req; if (WARN_ON(req)) media_request_object_unbind(obj); obj->ops->release(obj); } struct media_request_object * media_request_object_find(struct media_request *req, const struct media_request_object_ops *ops, void *priv) { struct media_request_object *obj; struct media_request_object *found = NULL; unsigned long flags; if (WARN_ON(!ops || !priv)) return NULL; spin_lock_irqsave(&req->lock, flags); list_for_each_entry(obj, &req->objects, list) { if (obj->ops == ops && obj->priv == priv) { media_request_object_get(obj); found = obj; break; } } spin_unlock_irqrestore(&req->lock, flags); return found; } EXPORT_SYMBOL_GPL(media_request_object_find); void media_request_object_put(struct media_request_object *obj) { kref_put(&obj->kref, media_request_object_release); } EXPORT_SYMBOL_GPL(media_request_object_put); void media_request_object_init(struct media_request_object *obj) { obj->ops = NULL; obj->req = NULL; obj->priv = NULL; obj->completed = false; INIT_LIST_HEAD(&obj->list); kref_init(&obj->kref); } EXPORT_SYMBOL_GPL(media_request_object_init); int media_request_object_bind(struct media_request *req, const struct media_request_object_ops *ops, void *priv, bool is_buffer, struct media_request_object *obj) { unsigned long flags; int ret = -EBUSY; if (WARN_ON(!ops->release)) return -EBADR; spin_lock_irqsave(&req->lock, flags); if (WARN_ON(req->state != MEDIA_REQUEST_STATE_UPDATING && req->state != MEDIA_REQUEST_STATE_QUEUED)) goto unlock; obj->req = req; obj->ops = ops; obj->priv = priv; if (is_buffer) list_add_tail(&obj->list, &req->objects); else list_add(&obj->list, &req->objects); req->num_incomplete_objects++; ret = 0; unlock: spin_unlock_irqrestore(&req->lock, flags); return ret; } EXPORT_SYMBOL_GPL(media_request_object_bind); void media_request_object_unbind(struct media_request_object *obj) { struct media_request *req = obj->req; unsigned long flags; bool completed = false; if (WARN_ON(!req)) return; spin_lock_irqsave(&req->lock, flags); list_del(&obj->list); obj->req = NULL; if (req->state == MEDIA_REQUEST_STATE_COMPLETE) goto unlock; if (WARN_ON(req->state == MEDIA_REQUEST_STATE_VALIDATING)) goto unlock; if (req->state == MEDIA_REQUEST_STATE_CLEANING) { if (!obj->completed) req->num_incomplete_objects--; goto unlock; } if (WARN_ON(!req->num_incomplete_objects)) goto unlock; req->num_incomplete_objects--; if (req->state == MEDIA_REQUEST_STATE_QUEUED && !req->num_incomplete_objects) { req->state = MEDIA_REQUEST_STATE_COMPLETE; completed = true; wake_up_interruptible_all(&req->poll_wait); } unlock: spin_unlock_irqrestore(&req->lock, flags); if (obj->ops->unbind) obj->ops->unbind(obj); if (completed) media_request_put(req); } EXPORT_SYMBOL_GPL(media_request_object_unbind); void media_request_object_complete(struct media_request_object *obj) { struct media_request *req = obj->req; unsigned long flags; bool completed = false; spin_lock_irqsave(&req->lock, flags); if (obj->completed) goto unlock; obj->completed = true; if (WARN_ON(!req->num_incomplete_objects) || WARN_ON(req->state != MEDIA_REQUEST_STATE_QUEUED)) goto unlock; if (!--req->num_incomplete_objects) { req->state = MEDIA_REQUEST_STATE_COMPLETE; wake_up_interruptible_all(&req->poll_wait); completed = true; } unlock: spin_unlock_irqrestore(&req->lock, flags); if (completed) media_request_put(req); } EXPORT_SYMBOL_GPL(media_request_object_complete); |
| 4 4 4 137 1 3 62 1 66 1 2 30 101 7 86 36 33 3 1 11 1 20 18 12 1 1 25 2 1 1 1 1 5 9 1 4 4 1 6 12 8 1 2 1 10 5 1 1 1 2 2 6 1 1 1 4 4 1 1 4 7 3 1 4 15 1 2 1 1 11 5 3 1 1 1 2 1 25 3 1 2 13 5 1 1 13 2 1 1 2 13 17 1 14 2 1 3 5 10 12 6 4 6 11 9 2 2 8 2 8 5 1 1 1 1 5 6 6 2 6 1 1 4 3 1 3 1 2 2 1 1 1 1 7 6 1 11 1 6 2 2 4 2 1 1 2 3 1 1 1 4 2 1 1 1 1 7 3 5 5 5 1 1 1 2 2 1 5 5 1 1 4 24 1 1 3 15 1 3 15 1 1 3 2 4 2 4 1 5 7 1 6 3 1 286 5 2 7 9 12 5 6 6 7 25 18 11 6 3 1 11 4 3 4 7 6 1 1 5 6 51 24 1 2 1 15 7 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 | // SPDX-License-Identifier: GPL-2.0-or-later /* Userspace key control operations * * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/init.h> #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/key.h> #include <linux/keyctl.h> #include <linux/fs.h> #include <linux/capability.h> #include <linux/cred.h> #include <linux/string.h> #include <linux/err.h> #include <linux/vmalloc.h> #include <linux/security.h> #include <linux/uio.h> #include <linux/uaccess.h> #include <keys/request_key_auth-type.h> #include "internal.h" #define KEY_MAX_DESC_SIZE 4096 static const unsigned char keyrings_capabilities[2] = { [0] = (KEYCTL_CAPS0_CAPABILITIES | (IS_ENABLED(CONFIG_PERSISTENT_KEYRINGS) ? KEYCTL_CAPS0_PERSISTENT_KEYRINGS : 0) | (IS_ENABLED(CONFIG_KEY_DH_OPERATIONS) ? KEYCTL_CAPS0_DIFFIE_HELLMAN : 0) | (IS_ENABLED(CONFIG_ASYMMETRIC_KEY_TYPE) ? KEYCTL_CAPS0_PUBLIC_KEY : 0) | (IS_ENABLED(CONFIG_BIG_KEYS) ? KEYCTL_CAPS0_BIG_KEY : 0) | KEYCTL_CAPS0_INVALIDATE | KEYCTL_CAPS0_RESTRICT_KEYRING | KEYCTL_CAPS0_MOVE ), [1] = (KEYCTL_CAPS1_NS_KEYRING_NAME | KEYCTL_CAPS1_NS_KEY_TAG | (IS_ENABLED(CONFIG_KEY_NOTIFICATIONS) ? KEYCTL_CAPS1_NOTIFICATIONS : 0) ), }; static int key_get_type_from_user(char *type, const char __user *_type, unsigned len) { int ret; ret = strncpy_from_user(type, _type, len); if (ret < 0) return ret; if (ret == 0 || ret >= len) return -EINVAL; if (type[0] == '.') return -EPERM; type[len - 1] = '\0'; return 0; } /* * Extract the description of a new key from userspace and either add it as a * new key to the specified keyring or update a matching key in that keyring. * * If the description is NULL or an empty string, the key type is asked to * generate one from the payload. * * The keyring must be writable so that we can attach the key to it. * * If successful, the new key's serial number is returned, otherwise an error * code is returned. */ SYSCALL_DEFINE5(add_key, const char __user *, _type, const char __user *, _description, const void __user *, _payload, size_t, plen, key_serial_t, ringid) { key_ref_t keyring_ref, key_ref; char type[32], *description; void *payload; long ret; ret = -EINVAL; if (plen > 1024 * 1024 - 1) goto error; /* draw all the data into kernel space */ ret = key_get_type_from_user(type, _type, sizeof(type)); if (ret < 0) goto error; description = NULL; if (_description) { description = strndup_user(_description, KEY_MAX_DESC_SIZE); if (IS_ERR(description)) { ret = PTR_ERR(description); goto error; } if (!*description) { kfree(description); description = NULL; } else if ((description[0] == '.') && (strncmp(type, "keyring", 7) == 0)) { ret = -EPERM; goto error2; } } /* pull the payload in if one was supplied */ payload = NULL; if (plen) { ret = -ENOMEM; payload = kvmalloc(plen, GFP_KERNEL); if (!payload) goto error2; ret = -EFAULT; if (copy_from_user(payload, _payload, plen) != 0) goto error3; } /* find the target keyring (which must be writable) */ keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error3; } /* create or update the requested key and add it to the target * keyring */ key_ref = key_create_or_update(keyring_ref, type, description, payload, plen, KEY_PERM_UNDEF, KEY_ALLOC_IN_QUOTA); if (!IS_ERR(key_ref)) { ret = key_ref_to_ptr(key_ref)->serial; key_ref_put(key_ref); } else { ret = PTR_ERR(key_ref); } key_ref_put(keyring_ref); error3: kvfree_sensitive(payload, plen); error2: kfree(description); error: return ret; } /* * Search the process keyrings and keyring trees linked from those for a * matching key. Keyrings must have appropriate Search permission to be * searched. * * If a key is found, it will be attached to the destination keyring if there's * one specified and the serial number of the key will be returned. * * If no key is found, /sbin/request-key will be invoked if _callout_info is * non-NULL in an attempt to create a key. The _callout_info string will be * passed to /sbin/request-key to aid with completing the request. If the * _callout_info string is "" then it will be changed to "-". */ SYSCALL_DEFINE4(request_key, const char __user *, _type, const char __user *, _description, const char __user *, _callout_info, key_serial_t, destringid) { struct key_type *ktype; struct key *key; key_ref_t dest_ref; size_t callout_len; char type[32], *description, *callout_info; long ret; /* pull the type into kernel space */ ret = key_get_type_from_user(type, _type, sizeof(type)); if (ret < 0) goto error; /* pull the description into kernel space */ description = strndup_user(_description, KEY_MAX_DESC_SIZE); if (IS_ERR(description)) { ret = PTR_ERR(description); goto error; } /* pull the callout info into kernel space */ callout_info = NULL; callout_len = 0; if (_callout_info) { callout_info = strndup_user(_callout_info, PAGE_SIZE); if (IS_ERR(callout_info)) { ret = PTR_ERR(callout_info); goto error2; } callout_len = strlen(callout_info); } /* get the destination keyring if specified */ dest_ref = NULL; if (destringid) { dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(dest_ref)) { ret = PTR_ERR(dest_ref); goto error3; } } /* find the key type */ ktype = key_type_lookup(type); if (IS_ERR(ktype)) { ret = PTR_ERR(ktype); goto error4; } /* do the search */ key = request_key_and_link(ktype, description, NULL, callout_info, callout_len, NULL, key_ref_to_ptr(dest_ref), KEY_ALLOC_IN_QUOTA); if (IS_ERR(key)) { ret = PTR_ERR(key); goto error5; } /* wait for the key to finish being constructed */ ret = wait_for_key_construction(key, 1); if (ret < 0) goto error6; ret = key->serial; error6: key_put(key); error5: key_type_put(ktype); error4: key_ref_put(dest_ref); error3: kfree(callout_info); error2: kfree(description); error: return ret; } /* * Get the ID of the specified process keyring. * * The requested keyring must have search permission to be found. * * If successful, the ID of the requested keyring will be returned. */ long keyctl_get_keyring_ID(key_serial_t id, int create) { key_ref_t key_ref; unsigned long lflags; long ret; lflags = create ? KEY_LOOKUP_CREATE : 0; key_ref = lookup_user_key(id, lflags, KEY_NEED_SEARCH); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; } ret = key_ref_to_ptr(key_ref)->serial; key_ref_put(key_ref); error: return ret; } /* * Join a (named) session keyring. * * Create and join an anonymous session keyring or join a named session * keyring, creating it if necessary. A named session keyring must have Search * permission for it to be joined. Session keyrings without this permit will * be skipped over. It is not permitted for userspace to create or join * keyrings whose name begin with a dot. * * If successful, the ID of the joined session keyring will be returned. */ long keyctl_join_session_keyring(const char __user *_name) { char *name; long ret; /* fetch the name from userspace */ name = NULL; if (_name) { name = strndup_user(_name, KEY_MAX_DESC_SIZE); if (IS_ERR(name)) { ret = PTR_ERR(name); goto error; } ret = -EPERM; if (name[0] == '.') goto error_name; } /* join the session */ ret = join_session_keyring(name); error_name: kfree(name); error: return ret; } /* * Update a key's data payload from the given data. * * The key must grant the caller Write permission and the key type must support * updating for this to work. A negative key can be positively instantiated * with this call. * * If successful, 0 will be returned. If the key type does not support * updating, then -EOPNOTSUPP will be returned. */ long keyctl_update_key(key_serial_t id, const void __user *_payload, size_t plen) { key_ref_t key_ref; void *payload; long ret; ret = -EINVAL; if (plen > PAGE_SIZE) goto error; /* pull the payload in if one was supplied */ payload = NULL; if (plen) { ret = -ENOMEM; payload = kvmalloc(plen, GFP_KERNEL); if (!payload) goto error; ret = -EFAULT; if (copy_from_user(payload, _payload, plen) != 0) goto error2; } /* find the target key (which must be writable) */ key_ref = lookup_user_key(id, 0, KEY_NEED_WRITE); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; } /* update the key */ ret = key_update(key_ref, payload, plen); key_ref_put(key_ref); error2: kvfree_sensitive(payload, plen); error: return ret; } /* * Revoke a key. * * The key must be grant the caller Write or Setattr permission for this to * work. The key type should give up its quota claim when revoked. The key * and any links to the key will be automatically garbage collected after a * certain amount of time (/proc/sys/kernel/keys/gc_delay). * * Keys with KEY_FLAG_KEEP set should not be revoked. * * If successful, 0 is returned. */ long keyctl_revoke_key(key_serial_t id) { key_ref_t key_ref; struct key *key; long ret; key_ref = lookup_user_key(id, 0, KEY_NEED_WRITE); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); if (ret != -EACCES) goto error; key_ref = lookup_user_key(id, 0, KEY_NEED_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; } } key = key_ref_to_ptr(key_ref); ret = 0; if (test_bit(KEY_FLAG_KEEP, &key->flags)) ret = -EPERM; else key_revoke(key); key_ref_put(key_ref); error: return ret; } /* * Invalidate a key. * * The key must be grant the caller Invalidate permission for this to work. * The key and any links to the key will be automatically garbage collected * immediately. * * Keys with KEY_FLAG_KEEP set should not be invalidated. * * If successful, 0 is returned. */ long keyctl_invalidate_key(key_serial_t id) { key_ref_t key_ref; struct key *key; long ret; kenter("%d", id); key_ref = lookup_user_key(id, 0, KEY_NEED_SEARCH); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); /* Root is permitted to invalidate certain special keys */ if (capable(CAP_SYS_ADMIN)) { key_ref = lookup_user_key(id, 0, KEY_SYSADMIN_OVERRIDE); if (IS_ERR(key_ref)) goto error; if (test_bit(KEY_FLAG_ROOT_CAN_INVAL, &key_ref_to_ptr(key_ref)->flags)) goto invalidate; goto error_put; } goto error; } invalidate: key = key_ref_to_ptr(key_ref); ret = 0; if (test_bit(KEY_FLAG_KEEP, &key->flags)) ret = -EPERM; else key_invalidate(key); error_put: key_ref_put(key_ref); error: kleave(" = %ld", ret); return ret; } /* * Clear the specified keyring, creating an empty process keyring if one of the * special keyring IDs is used. * * The keyring must grant the caller Write permission and not have * KEY_FLAG_KEEP set for this to work. If successful, 0 will be returned. */ long keyctl_keyring_clear(key_serial_t ringid) { key_ref_t keyring_ref; struct key *keyring; long ret; keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); /* Root is permitted to invalidate certain special keyrings */ if (capable(CAP_SYS_ADMIN)) { keyring_ref = lookup_user_key(ringid, 0, KEY_SYSADMIN_OVERRIDE); if (IS_ERR(keyring_ref)) goto error; if (test_bit(KEY_FLAG_ROOT_CAN_CLEAR, &key_ref_to_ptr(keyring_ref)->flags)) goto clear; goto error_put; } goto error; } clear: keyring = key_ref_to_ptr(keyring_ref); if (test_bit(KEY_FLAG_KEEP, &keyring->flags)) ret = -EPERM; else ret = keyring_clear(keyring); error_put: key_ref_put(keyring_ref); error: return ret; } /* * Create a link from a keyring to a key if there's no matching key in the * keyring, otherwise replace the link to the matching key with a link to the * new key. * * The key must grant the caller Link permission and the keyring must grant * the caller Write permission. Furthermore, if an additional link is created, * the keyring's quota will be extended. * * If successful, 0 will be returned. */ long keyctl_keyring_link(key_serial_t id, key_serial_t ringid) { key_ref_t keyring_ref, key_ref; long ret; keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error; } key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_NEED_LINK); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; } ret = key_link(key_ref_to_ptr(keyring_ref), key_ref_to_ptr(key_ref)); key_ref_put(key_ref); error2: key_ref_put(keyring_ref); error: return ret; } /* * Unlink a key from a keyring. * * The keyring must grant the caller Write permission for this to work; the key * itself need not grant the caller anything. If the last link to a key is * removed then that key will be scheduled for destruction. * * Keys or keyrings with KEY_FLAG_KEEP set should not be unlinked. * * If successful, 0 will be returned. */ long keyctl_keyring_unlink(key_serial_t id, key_serial_t ringid) { key_ref_t keyring_ref, key_ref; struct key *keyring, *key; long ret; keyring_ref = lookup_user_key(ringid, 0, KEY_NEED_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error; } key_ref = lookup_user_key(id, KEY_LOOKUP_PARTIAL, KEY_NEED_UNLINK); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; } keyring = key_ref_to_ptr(keyring_ref); key = key_ref_to_ptr(key_ref); if (test_bit(KEY_FLAG_KEEP, &keyring->flags) && test_bit(KEY_FLAG_KEEP, &key->flags)) ret = -EPERM; else ret = key_unlink(keyring, key); key_ref_put(key_ref); error2: key_ref_put(keyring_ref); error: return ret; } /* * Move a link to a key from one keyring to another, displacing any matching * key from the destination keyring. * * The key must grant the caller Link permission and both keyrings must grant * the caller Write permission. There must also be a link in the from keyring * to the key. If both keyrings are the same, nothing is done. * * If successful, 0 will be returned. */ long keyctl_keyring_move(key_serial_t id, key_serial_t from_ringid, key_serial_t to_ringid, unsigned int flags) { key_ref_t key_ref, from_ref, to_ref; long ret; if (flags & ~KEYCTL_MOVE_EXCL) return -EINVAL; key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_NEED_LINK); if (IS_ERR(key_ref)) return PTR_ERR(key_ref); from_ref = lookup_user_key(from_ringid, 0, KEY_NEED_WRITE); if (IS_ERR(from_ref)) { ret = PTR_ERR(from_ref); goto error2; } to_ref = lookup_user_key(to_ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(to_ref)) { ret = PTR_ERR(to_ref); goto error3; } ret = key_move(key_ref_to_ptr(key_ref), key_ref_to_ptr(from_ref), key_ref_to_ptr(to_ref), flags); key_ref_put(to_ref); error3: key_ref_put(from_ref); error2: key_ref_put(key_ref); return ret; } /* * Return a description of a key to userspace. * * The key must grant the caller View permission for this to work. * * If there's a buffer, we place up to buflen bytes of data into it formatted * in the following way: * * type;uid;gid;perm;description<NUL> * * If successful, we return the amount of description available, irrespective * of how much we may have copied into the buffer. */ long keyctl_describe_key(key_serial_t keyid, char __user *buffer, size_t buflen) { struct key *key, *instkey; key_ref_t key_ref; char *infobuf; long ret; int desclen, infolen; key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_NEED_VIEW); if (IS_ERR(key_ref)) { /* viewing a key under construction is permitted if we have the * authorisation token handy */ if (PTR_ERR(key_ref) == -EACCES) { instkey = key_get_instantiation_authkey(keyid); if (!IS_ERR(instkey)) { key_put(instkey); key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_AUTHTOKEN_OVERRIDE); if (!IS_ERR(key_ref)) goto okay; } } ret = PTR_ERR(key_ref); goto error; } okay: key = key_ref_to_ptr(key_ref); desclen = strlen(key->description); /* calculate how much information we're going to return */ ret = -ENOMEM; infobuf = kasprintf(GFP_KERNEL, "%s;%d;%d;%08x;", key->type->name, from_kuid_munged(current_user_ns(), key->uid), from_kgid_munged(current_user_ns(), key->gid), key->perm); if (!infobuf) goto error2; infolen = strlen(infobuf); ret = infolen + desclen + 1; /* consider returning the data */ if (buffer && buflen >= ret) { if (copy_to_user(buffer, infobuf, infolen) != 0 || copy_to_user(buffer + infolen, key->description, desclen + 1) != 0) ret = -EFAULT; } kfree(infobuf); error2: key_ref_put(key_ref); error: return ret; } /* * Search the specified keyring and any keyrings it links to for a matching * key. Only keyrings that grant the caller Search permission will be searched * (this includes the starting keyring). Only keys with Search permission can * be found. * * If successful, the found key will be linked to the destination keyring if * supplied and the key has Link permission, and the found key ID will be * returned. */ long keyctl_keyring_search(key_serial_t ringid, const char __user *_type, const char __user *_description, key_serial_t destringid) { struct key_type *ktype; key_ref_t keyring_ref, key_ref, dest_ref; char type[32], *description; long ret; /* pull the type and description into kernel space */ ret = key_get_type_from_user(type, _type, sizeof(type)); if (ret < 0) goto error; description = strndup_user(_description, KEY_MAX_DESC_SIZE); if (IS_ERR(description)) { ret = PTR_ERR(description); goto error; } /* get the keyring at which to begin the search */ keyring_ref = lookup_user_key(ringid, 0, KEY_NEED_SEARCH); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error2; } /* get the destination keyring if specified */ dest_ref = NULL; if (destringid) { dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(dest_ref)) { ret = PTR_ERR(dest_ref); goto error3; } } /* find the key type */ ktype = key_type_lookup(type); if (IS_ERR(ktype)) { ret = PTR_ERR(ktype); goto error4; } /* do the search */ key_ref = keyring_search(keyring_ref, ktype, description, true); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); /* treat lack or presence of a negative key the same */ if (ret == -EAGAIN) ret = -ENOKEY; goto error5; } /* link the resulting key to the destination keyring if we can */ if (dest_ref) { ret = key_permission(key_ref, KEY_NEED_LINK); if (ret < 0) goto error6; ret = key_link(key_ref_to_ptr(dest_ref), key_ref_to_ptr(key_ref)); if (ret < 0) goto error6; } ret = key_ref_to_ptr(key_ref)->serial; error6: key_ref_put(key_ref); error5: key_type_put(ktype); error4: key_ref_put(dest_ref); error3: key_ref_put(keyring_ref); error2: kfree(description); error: return ret; } /* * Call the read method */ static long __keyctl_read_key(struct key *key, char *buffer, size_t buflen) { long ret; down_read(&key->sem); ret = key_validate(key); if (ret == 0) ret = key->type->read(key, buffer, buflen); up_read(&key->sem); return ret; } /* * Read a key's payload. * * The key must either grant the caller Read permission, or it must grant the * caller Search permission when searched for from the process keyrings. * * If successful, we place up to buflen bytes of data into the buffer, if one * is provided, and return the amount of data that is available in the key, * irrespective of how much we copied into the buffer. */ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) { struct key *key; key_ref_t key_ref; long ret; char *key_data = NULL; size_t key_data_len; /* find the key first */ key_ref = lookup_user_key(keyid, 0, KEY_DEFER_PERM_CHECK); if (IS_ERR(key_ref)) { ret = -ENOKEY; goto out; } key = key_ref_to_ptr(key_ref); ret = key_read_state(key); if (ret < 0) goto key_put_out; /* Negatively instantiated */ /* see if we can read it directly */ ret = key_permission(key_ref, KEY_NEED_READ); if (ret == 0) goto can_read_key; if (ret != -EACCES) goto key_put_out; /* we can't; see if it's searchable from this process's keyrings * - we automatically take account of the fact that it may be * dangling off an instantiation key */ if (!is_key_possessed(key_ref)) { ret = -EACCES; goto key_put_out; } /* the key is probably readable - now try to read it */ can_read_key: if (!key->type->read) { ret = -EOPNOTSUPP; goto key_put_out; } if (!buffer || !buflen) { /* Get the key length from the read method */ ret = __keyctl_read_key(key, NULL, 0); goto key_put_out; } /* * Read the data with the semaphore held (since we might sleep) * to protect against the key being updated or revoked. * * Allocating a temporary buffer to hold the keys before * transferring them to user buffer to avoid potential * deadlock involving page fault and mmap_lock. * * key_data_len = (buflen <= PAGE_SIZE) * ? buflen : actual length of key data * * This prevents allocating arbitrary large buffer which can * be much larger than the actual key length. In the latter case, * at least 2 passes of this loop is required. */ key_data_len = (buflen <= PAGE_SIZE) ? buflen : 0; for (;;) { if (key_data_len) { key_data = kvmalloc(key_data_len, GFP_KERNEL); if (!key_data) { ret = -ENOMEM; goto key_put_out; } } ret = __keyctl_read_key(key, key_data, key_data_len); /* * Read methods will just return the required length without * any copying if the provided length isn't large enough. */ if (ret <= 0 || ret > buflen) break; /* * The key may change (unlikely) in between 2 consecutive * __keyctl_read_key() calls. In this case, we reallocate * a larger buffer and redo the key read when * key_data_len < ret <= buflen. */ if (ret > key_data_len) { if (unlikely(key_data)) kvfree_sensitive(key_data, key_data_len); key_data_len = ret; continue; /* Allocate buffer */ } if (copy_to_user(buffer, key_data, ret)) ret = -EFAULT; break; } kvfree_sensitive(key_data, key_data_len); key_put_out: key_put(key); out: return ret; } /* * Change the ownership of a key * * The key must grant the caller Setattr permission for this to work, though * the key need not be fully instantiated yet. For the UID to be changed, or * for the GID to be changed to a group the caller is not a member of, the * caller must have sysadmin capability. If either uid or gid is -1 then that * attribute is not changed. * * If the UID is to be changed, the new user must have sufficient quota to * accept the key. The quota deduction will be removed from the old user to * the new user should the attribute be changed. * * If successful, 0 will be returned. */ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group) { struct key_user *newowner, *zapowner = NULL; struct key *key; key_ref_t key_ref; long ret; kuid_t uid; kgid_t gid; unsigned long flags; uid = make_kuid(current_user_ns(), user); gid = make_kgid(current_user_ns(), group); ret = -EINVAL; if ((user != (uid_t) -1) && !uid_valid(uid)) goto error; if ((group != (gid_t) -1) && !gid_valid(gid)) goto error; ret = 0; if (user == (uid_t) -1 && group == (gid_t) -1) goto error; key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL, KEY_NEED_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; } key = key_ref_to_ptr(key_ref); /* make the changes with the locks held to prevent chown/chown races */ ret = -EACCES; down_write(&key->sem); { bool is_privileged_op = false; /* only the sysadmin can chown a key to some other UID */ if (user != (uid_t) -1 && !uid_eq(key->uid, uid)) is_privileged_op = true; /* only the sysadmin can set the key's GID to a group other * than one of those that the current process subscribes to */ if (group != (gid_t) -1 && !gid_eq(gid, key->gid) && !in_group_p(gid)) is_privileged_op = true; if (is_privileged_op && !capable(CAP_SYS_ADMIN)) goto error_put; } /* change the UID */ if (user != (uid_t) -1 && !uid_eq(uid, key->uid)) { ret = -ENOMEM; newowner = key_user_lookup(uid); if (!newowner) goto error_put; /* transfer the quota burden to the new user */ if (test_bit(KEY_FLAG_IN_QUOTA, &key->flags)) { unsigned maxkeys = uid_eq(uid, GLOBAL_ROOT_UID) ? key_quota_root_maxkeys : key_quota_maxkeys; unsigned maxbytes = uid_eq(uid, GLOBAL_ROOT_UID) ? key_quota_root_maxbytes : key_quota_maxbytes; spin_lock_irqsave(&newowner->lock, flags); if (newowner->qnkeys + 1 > maxkeys || newowner->qnbytes + key->quotalen > maxbytes || newowner->qnbytes + key->quotalen < newowner->qnbytes) goto quota_overrun; newowner->qnkeys++; newowner->qnbytes += key->quotalen; spin_unlock_irqrestore(&newowner->lock, flags); spin_lock_irqsave(&key->user->lock, flags); key->user->qnkeys--; key->user->qnbytes -= key->quotalen; spin_unlock_irqrestore(&key->user->lock, flags); } atomic_dec(&key->user->nkeys); atomic_inc(&newowner->nkeys); if (key->state != KEY_IS_UNINSTANTIATED) { atomic_dec(&key->user->nikeys); atomic_inc(&newowner->nikeys); } zapowner = key->user; key->user = newowner; key->uid = uid; } /* change the GID */ if (group != (gid_t) -1) key->gid = gid; notify_key(key, NOTIFY_KEY_SETATTR, 0); ret = 0; error_put: up_write(&key->sem); key_put(key); if (zapowner) key_user_put(zapowner); error: return ret; quota_overrun: spin_unlock_irqrestore(&newowner->lock, flags); zapowner = newowner; ret = -EDQUOT; goto error_put; } /* * Change the permission mask on a key. * * The key must grant the caller Setattr permission for this to work, though * the key need not be fully instantiated yet. If the caller does not have * sysadmin capability, it may only change the permission on keys that it owns. */ long keyctl_setperm_key(key_serial_t id, key_perm_t perm) { struct key *key; key_ref_t key_ref; long ret; ret = -EINVAL; if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL)) goto error; key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL, KEY_NEED_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; } key = key_ref_to_ptr(key_ref); /* make the changes with the locks held to prevent chown/chmod races */ ret = -EACCES; down_write(&key->sem); /* if we're not the sysadmin, we can only change a key that we own */ if (uid_eq(key->uid, current_fsuid()) || capable(CAP_SYS_ADMIN)) { key->perm = perm; notify_key(key, NOTIFY_KEY_SETATTR, 0); ret = 0; } up_write(&key->sem); key_put(key); error: return ret; } /* * Get the destination keyring for instantiation and check that the caller has * Write permission on it. */ static long get_instantiation_keyring(key_serial_t ringid, struct request_key_auth *rka, struct key **_dest_keyring) { key_ref_t dkref; *_dest_keyring = NULL; /* just return a NULL pointer if we weren't asked to make a link */ if (ringid == 0) return 0; /* if a specific keyring is nominated by ID, then use that */ if (ringid > 0) { dkref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(dkref)) return PTR_ERR(dkref); *_dest_keyring = key_ref_to_ptr(dkref); return 0; } if (ringid == KEY_SPEC_REQKEY_AUTH_KEY) return -EINVAL; /* otherwise specify the destination keyring recorded in the * authorisation key (any KEY_SPEC_*_KEYRING) */ if (ringid >= KEY_SPEC_REQUESTOR_KEYRING) { *_dest_keyring = key_get(rka->dest_keyring); return 0; } return -ENOKEY; } /* * Change the request_key authorisation key on the current process. */ static int keyctl_change_reqkey_auth(struct key *key) { struct cred *new; new = prepare_creds(); if (!new) return -ENOMEM; key_put(new->request_key_auth); new->request_key_auth = key_get(key); return commit_creds(new); } /* * Instantiate a key with the specified payload and link the key into the * destination keyring if one is given. * * The caller must have the appropriate instantiation permit set for this to * work (see keyctl_assume_authority). No other permissions are required. * * If successful, 0 will be returned. */ static long keyctl_instantiate_key_common(key_serial_t id, struct iov_iter *from, key_serial_t ringid) { const struct cred *cred = current_cred(); struct request_key_auth *rka; struct key *instkey, *dest_keyring; size_t plen = from ? iov_iter_count(from) : 0; void *payload; long ret; kenter("%d,,%zu,%d", id, plen, ringid); if (!plen) from = NULL; ret = -EINVAL; if (plen > 1024 * 1024 - 1) goto error; /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; instkey = cred->request_key_auth; if (!instkey) goto error; rka = instkey->payload.data[0]; if (rka->target_key->serial != id) goto error; /* pull the payload in if one was supplied */ payload = NULL; if (from) { ret = -ENOMEM; payload = kvmalloc(plen, GFP_KERNEL); if (!payload) goto error; ret = -EFAULT; if (!copy_from_iter_full(payload, plen, from)) goto error2; } /* find the destination keyring amongst those belonging to the * requesting task */ ret = get_i |