Total coverage: 452820 (23%)of 1995065
136 134 136 134 68 68 68 68 68 68 19 4 2 2 4 4 7 9 8 2 5 2 6 9 2 27 27 1 1 25 25 25 1 1 25 1 1 1 1 13 13 13 39 39 13 9 39 39 1 38 13 13 13 13 38 39 65 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 // SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/mount.c - kernfs mount implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> */ #include <linux/fs.h> #include <linux/mount.h> #include <linux/init.h> #include <linux/magic.h> #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/namei.h> #include <linux/seq_file.h> #include <linux/exportfs.h> #include <linux/uuid.h> #include <linux/statfs.h> #include "kernfs-internal.h" struct kmem_cache *kernfs_node_cache __ro_after_init; struct kmem_cache *kernfs_iattrs_cache __ro_after_init; struct kernfs_global_locks *kernfs_locks __ro_after_init; static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) { struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry)); struct kernfs_syscall_ops *scops = root->syscall_ops; if (scops && scops->show_options) return scops->show_options(sf, root); return 0; } static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry) { struct kernfs_node *node = kernfs_dentry_node(dentry); struct kernfs_root *root = kernfs_root(node); struct kernfs_syscall_ops *scops = root->syscall_ops; if (scops && scops->show_path) return scops->show_path(sf, node, root); seq_dentry(sf, dentry, " \t\n\\"); return 0; } static int kernfs_statfs(struct dentry *dentry, struct kstatfs *buf) { simple_statfs(dentry, buf); buf->f_fsid = uuid_to_fsid(dentry->d_sb->s_uuid.b); return 0; } const struct super_operations kernfs_sops = { .statfs = kernfs_statfs, .drop_inode = generic_delete_inode, .evict_inode = kernfs_evict_inode, .show_options = kernfs_sop_show_options, .show_path = kernfs_sop_show_path, }; static int kernfs_encode_fh(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) { struct kernfs_node *kn = inode->i_private; if (*max_len < 2) { *max_len = 2; return FILEID_INVALID; } *max_len = 2; *(u64 *)fh = kn->id; return FILEID_KERNFS; } static struct dentry *__kernfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type, bool get_parent) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_node *kn; struct inode *inode; u64 id; if (fh_len < 2) return NULL; switch (fh_type) { case FILEID_KERNFS: id = *(u64 *)fid; break; case FILEID_INO32_GEN: case FILEID_INO32_GEN_PARENT: /* * blk_log_action() exposes "LOW32,HIGH32" pair without * type and userland can call us with generic fid * constructed from them. Combine it back to ID. See * blk_log_action(). */ id = ((u64)fid->i32.gen << 32) | fid->i32.ino; break; default: return NULL; } kn = kernfs_find_and_get_node_by_id(info->root, id); if (!kn) return ERR_PTR(-ESTALE); if (get_parent) { struct kernfs_node *parent; parent = kernfs_get_parent(kn); kernfs_put(kn); kn = parent; if (!kn) return ERR_PTR(-ESTALE); } inode = kernfs_get_inode(sb, kn); kernfs_put(kn); return d_obtain_alias(inode); } static struct dentry *kernfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return __kernfs_fh_to_dentry(sb, fid, fh_len, fh_type, false); } static struct dentry *kernfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return __kernfs_fh_to_dentry(sb, fid, fh_len, fh_type, true); } static struct dentry *kernfs_get_parent_dentry(struct dentry *child) { struct kernfs_node *kn = kernfs_dentry_node(child); return d_obtain_alias(kernfs_get_inode(child->d_sb, kn->parent)); } static const struct export_operations kernfs_export_ops = { .encode_fh = kernfs_encode_fh, .fh_to_dentry = kernfs_fh_to_dentry, .fh_to_parent = kernfs_fh_to_parent, .get_parent = kernfs_get_parent_dentry, }; /** * kernfs_root_from_sb - determine kernfs_root associated with a super_block * @sb: the super_block in question * * Return: the kernfs_root associated with @sb. If @sb is not a kernfs one, * %NULL is returned. */ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) { if (sb->s_op == &kernfs_sops) return kernfs_info(sb)->root; return NULL; } /* * find the next ancestor in the path down to @child, where @parent was the * ancestor whose descendant we want to find. * * Say the path is /a/b/c/d. @child is d, @parent is %NULL. We return the root * node. If @parent is b, then we return the node for c. * Passing in d as @parent is not ok. */ static struct kernfs_node *find_next_ancestor(struct kernfs_node *child, struct kernfs_node *parent) { if (child == parent) { pr_crit_once("BUG in find_next_ancestor: called with parent == child"); return NULL; } while (child->parent != parent) { if (!child->parent) return NULL; child = child->parent; } return child; } /** * kernfs_node_dentry - get a dentry for the given kernfs_node * @kn: kernfs_node for which a dentry is needed * @sb: the kernfs super_block * * Return: the dentry pointer */ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, struct super_block *sb) { struct dentry *dentry; struct kernfs_node *knparent; BUG_ON(sb->s_op != &kernfs_sops); dentry = dget(sb->s_root); /* Check if this is the root kernfs_node */ if (!kn->parent) return dentry; knparent = find_next_ancestor(kn, NULL); if (WARN_ON(!knparent)) { dput(dentry); return ERR_PTR(-EINVAL); } do { struct dentry *dtmp; struct kernfs_node *kntmp; if (kn == knparent) return dentry; kntmp = find_next_ancestor(kn, knparent); if (WARN_ON(!kntmp)) { dput(dentry); return ERR_PTR(-EINVAL); } dtmp = lookup_positive_unlocked(kntmp->name, dentry, strlen(kntmp->name)); dput(dentry); if (IS_ERR(dtmp)) return dtmp; knparent = kntmp; dentry = dtmp; } while (true); } static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_root *kf_root = kfc->root; struct inode *inode; struct dentry *root; info->sb = sb; /* Userspace would break if executables or devices appear on sysfs */ sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = kfc->magic; sb->s_op = &kernfs_sops; sb->s_xattr = kernfs_xattr_handlers; if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP) sb->s_export_op = &kernfs_export_ops; sb->s_time_gran = 1; /* sysfs dentries and inodes don't require IO to create */ sb->s_shrink->seeks = 0; /* get root inode, initialize and unlock it */ down_read(&kf_root->kernfs_rwsem); inode = kernfs_get_inode(sb, info->root->kn); up_read(&kf_root->kernfs_rwsem); if (!inode) { pr_debug("kernfs: could not get root inode\n"); return -ENOMEM; } /* instantiate and link root dentry */ root = d_make_root(inode); if (!root) { pr_debug("%s: could not get root dentry!\n", __func__); return -ENOMEM; } sb->s_root = root; sb->s_d_op = &kernfs_dops; return 0; } static int kernfs_test_super(struct super_block *sb, struct fs_context *fc) { struct kernfs_super_info *sb_info = kernfs_info(sb); struct kernfs_super_info *info = fc->s_fs_info; return sb_info->root == info->root && sb_info->ns == info->ns; } static int kernfs_set_super(struct super_block *sb, struct fs_context *fc) { struct kernfs_fs_context *kfc = fc->fs_private; kfc->ns_tag = NULL; return set_anon_super_fc(sb, fc); } /** * kernfs_super_ns - determine the namespace tag of a kernfs super_block * @sb: super_block of interest * * Return: the namespace tag associated with kernfs super_block @sb. */ const void *kernfs_super_ns(struct super_block *sb) { struct kernfs_super_info *info = kernfs_info(sb); return info->ns; } /** * kernfs_get_tree - kernfs filesystem access/retrieval helper * @fc: The filesystem context. * * This is to be called from each kernfs user's fs_context->ops->get_tree() * implementation, which should set the specified ->@fs_type and ->@flags, and * specify the hierarchy and namespace tag to mount via ->@root and ->@ns, * respectively. * * Return: %0 on success, -errno on failure. */ int kernfs_get_tree(struct fs_context *fc) { struct kernfs_fs_context *kfc = fc->fs_private; struct super_block *sb; struct kernfs_super_info *info; int error; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; info->root = kfc->root; info->ns = kfc->ns_tag; INIT_LIST_HEAD(&info->node); fc->s_fs_info = info; sb = sget_fc(fc, kernfs_test_super, kernfs_set_super); if (IS_ERR(sb)) return PTR_ERR(sb); if (!sb->s_root) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_root *root = kfc->root; kfc->new_sb_created = true; error = kernfs_fill_super(sb, kfc); if (error) { deactivate_locked_super(sb); return error; } sb->s_flags |= SB_ACTIVE; uuid_t uuid; uuid_gen(&uuid); super_set_uuid(sb, uuid.b, sizeof(uuid)); down_write(&root->kernfs_supers_rwsem); list_add(&info->node, &info->root->supers); up_write(&root->kernfs_supers_rwsem); } fc->root = dget(sb->s_root); return 0; } void kernfs_free_fs_context(struct fs_context *fc) { /* Note that we don't deal with kfc->ns_tag here. */ kfree(fc->s_fs_info); fc->s_fs_info = NULL; } /** * kernfs_kill_sb - kill_sb for kernfs * @sb: super_block being killed * * This can be used directly for file_system_type->kill_sb(). If a kernfs * user needs extra cleanup, it can implement its own kill_sb() and call * this function at the end. */ void kernfs_kill_sb(struct super_block *sb) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_root *root = info->root; down_write(&root->kernfs_supers_rwsem); list_del(&info->node); up_write(&root->kernfs_supers_rwsem); /* * Remove the superblock from fs_supers/s_instances * so we can't find it, before freeing kernfs_super_info. */ kill_anon_super(sb); kfree(info); } static void __init kernfs_mutex_init(void) { int count; for (count = 0; count < NR_KERNFS_LOCKS; count++) mutex_init(&kernfs_locks->open_file_mutex[count]); } static void __init kernfs_lock_init(void) { kernfs_locks = kmalloc(sizeof(struct kernfs_global_locks), GFP_KERNEL); WARN_ON(!kernfs_locks); kernfs_mutex_init(); } void __init kernfs_init(void) { kernfs_node_cache = kmem_cache_create("kernfs_node_cache", sizeof(struct kernfs_node), 0, SLAB_PANIC, NULL); /* Creates slab cache for kernfs inode attributes */ kernfs_iattrs_cache = kmem_cache_create("kernfs_iattrs_cache", sizeof(struct kernfs_iattrs), 0, SLAB_PANIC, NULL); kernfs_lock_init(); }
3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 9 9 9 9 9 9 3 9 9 9 9 3 9 9 9 9 8 8 8 8 8 8 8 8 8 8 8 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 9 9 6 2 2 2 2 2 2 2 9 9 20 19 4 20 20 18 20 20 7 7 3 7 7 7 6 6 6 6 20 19 19 19 19 10 4 6 10 10 10 1 11 11 11 11 2 9 9 9 2 9 11 11 12 12 12 11 11 19 19 20 20 20 19 18 5 5 6 3 6 3 6 16 15 15 15 15 2 2 2 9 13 13 13 12 1 1 1 1 1 1 1 4 4 4 4 4 4 4 4 3 3 3 3 3 3 1 3 3 3 3 3 3 3 7 20 18 20 16 6 12 12 20 20 20 4 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 /* * DRBG: Deterministic Random Bits Generator * Based on NIST Recommended DRBG from NIST SP800-90A with the following * properties: * * CTR DRBG with DF with AES-128, AES-192, AES-256 cores * * Hash DRBG with DF with SHA-1, SHA-256, SHA-384, SHA-512 cores * * HMAC DRBG with DF with SHA-1, SHA-256, SHA-384, SHA-512 cores * * with and without prediction resistance * * Copyright Stephan Mueller <smueller@chronox.de>, 2014 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, and the entire permission notice in its entirety, * including the disclaimer of warranties. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * ALTERNATIVELY, this product may be distributed under the terms of * the GNU General Public License, in which case the provisions of the GPL are * required INSTEAD OF the above restrictions. (This clause is * necessary due to a potential bad interaction between the GPL and * the restrictions contained in a BSD-style copyright.) * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * DRBG Usage * ========== * The SP 800-90A DRBG allows the user to specify a personalization string * for initialization as well as an additional information string for each * random number request. The following code fragments show how a caller * uses the kernel crypto API to use the full functionality of the DRBG. * * Usage without any additional data * --------------------------------- * struct crypto_rng *drng; * int err; * char data[DATALEN]; * * drng = crypto_alloc_rng(drng_name, 0, 0); * err = crypto_rng_get_bytes(drng, &data, DATALEN); * crypto_free_rng(drng); * * * Usage with personalization string during initialization * ------------------------------------------------------- * struct crypto_rng *drng; * int err; * char data[DATALEN]; * struct drbg_string pers; * char personalization[11] = "some-string"; * * drbg_string_fill(&pers, personalization, strlen(personalization)); * drng = crypto_alloc_rng(drng_name, 0, 0); * // The reset completely re-initializes the DRBG with the provided * // personalization string * err = crypto_rng_reset(drng, &personalization, strlen(personalization)); * err = crypto_rng_get_bytes(drng, &data, DATALEN); * crypto_free_rng(drng); * * * Usage with additional information string during random number request * --------------------------------------------------------------------- * struct crypto_rng *drng; * int err; * char data[DATALEN]; * char addtl_string[11] = "some-string"; * string drbg_string addtl; * * drbg_string_fill(&addtl, addtl_string, strlen(addtl_string)); * drng = crypto_alloc_rng(drng_name, 0, 0); * // The following call is a wrapper to crypto_rng_get_bytes() and returns * // the same error codes. * err = crypto_drbg_get_bytes_addtl(drng, &data, DATALEN, &addtl); * crypto_free_rng(drng); * * * Usage with personalization and additional information strings * ------------------------------------------------------------- * Just mix both scenarios above. */ #include <crypto/drbg.h> #include <crypto/internal/cipher.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/string_choices.h> /*************************************************************** * Backend cipher definitions available to DRBG ***************************************************************/ /* * The order of the DRBG definitions here matter: every DRBG is registered * as stdrng. Each DRBG receives an increasing cra_priority values the later * they are defined in this array (see drbg_fill_array). * * HMAC DRBGs are favored over Hash DRBGs over CTR DRBGs, and the * HMAC-SHA512 / SHA256 / AES 256 over other ciphers. Thus, the * favored DRBGs are the latest entries in this array. */ static const struct drbg_core drbg_cores[] = { #ifdef CONFIG_CRYPTO_DRBG_CTR { .flags = DRBG_CTR | DRBG_STRENGTH128, .statelen = 32, /* 256 bits as defined in 10.2.1 */ .blocklen_bytes = 16, .cra_name = "ctr_aes128", .backend_cra_name = "aes", }, { .flags = DRBG_CTR | DRBG_STRENGTH192, .statelen = 40, /* 320 bits as defined in 10.2.1 */ .blocklen_bytes = 16, .cra_name = "ctr_aes192", .backend_cra_name = "aes", }, { .flags = DRBG_CTR | DRBG_STRENGTH256, .statelen = 48, /* 384 bits as defined in 10.2.1 */ .blocklen_bytes = 16, .cra_name = "ctr_aes256", .backend_cra_name = "aes", }, #endif /* CONFIG_CRYPTO_DRBG_CTR */ #ifdef CONFIG_CRYPTO_DRBG_HASH { .flags = DRBG_HASH | DRBG_STRENGTH256, .statelen = 111, /* 888 bits */ .blocklen_bytes = 48, .cra_name = "sha384", .backend_cra_name = "sha384", }, { .flags = DRBG_HASH | DRBG_STRENGTH256, .statelen = 111, /* 888 bits */ .blocklen_bytes = 64, .cra_name = "sha512", .backend_cra_name = "sha512", }, { .flags = DRBG_HASH | DRBG_STRENGTH256, .statelen = 55, /* 440 bits */ .blocklen_bytes = 32, .cra_name = "sha256", .backend_cra_name = "sha256", }, #endif /* CONFIG_CRYPTO_DRBG_HASH */ #ifdef CONFIG_CRYPTO_DRBG_HMAC { .flags = DRBG_HMAC | DRBG_STRENGTH256, .statelen = 48, /* block length of cipher */ .blocklen_bytes = 48, .cra_name = "hmac_sha384", .backend_cra_name = "hmac(sha384)", }, { .flags = DRBG_HMAC | DRBG_STRENGTH256, .statelen = 32, /* block length of cipher */ .blocklen_bytes = 32, .cra_name = "hmac_sha256", .backend_cra_name = "hmac(sha256)", }, { .flags = DRBG_HMAC | DRBG_STRENGTH256, .statelen = 64, /* block length of cipher */ .blocklen_bytes = 64, .cra_name = "hmac_sha512", .backend_cra_name = "hmac(sha512)", }, #endif /* CONFIG_CRYPTO_DRBG_HMAC */ }; static int drbg_uninstantiate(struct drbg_state *drbg); /****************************************************************** * Generic helper functions ******************************************************************/ /* * Return strength of DRBG according to SP800-90A section 8.4 * * @flags DRBG flags reference * * Return: normalized strength in *bytes* value or 32 as default * to counter programming errors */ static inline unsigned short drbg_sec_strength(drbg_flag_t flags) { switch (flags & DRBG_STRENGTH_MASK) { case DRBG_STRENGTH128: return 16; case DRBG_STRENGTH192: return 24; case DRBG_STRENGTH256: return 32; default: return 32; } } /* * FIPS 140-2 continuous self test for the noise source * The test is performed on the noise source input data. Thus, the function * implicitly knows the size of the buffer to be equal to the security * strength. * * Note, this function disregards the nonce trailing the entropy data during * initial seeding. * * drbg->drbg_mutex must have been taken. * * @drbg DRBG handle * @entropy buffer of seed data to be checked * * return: * 0 on success * -EAGAIN on when the CTRNG is not yet primed * < 0 on error */ static int drbg_fips_continuous_test(struct drbg_state *drbg, const unsigned char *entropy) { unsigned short entropylen = drbg_sec_strength(drbg->core->flags); int ret = 0; if (!IS_ENABLED(CONFIG_CRYPTO_FIPS)) return 0; /* skip test if we test the overall system */ if (list_empty(&drbg->test_data.list)) return 0; /* only perform test in FIPS mode */ if (!fips_enabled) return 0; if (!drbg->fips_primed) { /* Priming of FIPS test */ memcpy(drbg->prev, entropy, entropylen); drbg->fips_primed = true; /* priming: another round is needed */ return -EAGAIN; } ret = memcmp(drbg->prev, entropy, entropylen); if (!ret) panic("DRBG continuous self test failed\n"); memcpy(drbg->prev, entropy, entropylen); /* the test shall pass when the two values are not equal */ return 0; } /* * Convert an integer into a byte representation of this integer. * The byte representation is big-endian * * @val value to be converted * @buf buffer holding the converted integer -- caller must ensure that * buffer size is at least 32 bit */ #if (defined(CONFIG_CRYPTO_DRBG_HASH) || defined(CONFIG_CRYPTO_DRBG_CTR)) static inline void drbg_cpu_to_be32(__u32 val, unsigned char *buf) { struct s { __be32 conv; }; struct s *conversion = (struct s *) buf; conversion->conv = cpu_to_be32(val); } #endif /* defined(CONFIG_CRYPTO_DRBG_HASH) || defined(CONFIG_CRYPTO_DRBG_CTR) */ /****************************************************************** * CTR DRBG callback functions ******************************************************************/ #ifdef CONFIG_CRYPTO_DRBG_CTR #define CRYPTO_DRBG_CTR_STRING "CTR " MODULE_ALIAS_CRYPTO("drbg_pr_ctr_aes256"); MODULE_ALIAS_CRYPTO("drbg_nopr_ctr_aes256"); MODULE_ALIAS_CRYPTO("drbg_pr_ctr_aes192"); MODULE_ALIAS_CRYPTO("drbg_nopr_ctr_aes192"); MODULE_ALIAS_CRYPTO("drbg_pr_ctr_aes128"); MODULE_ALIAS_CRYPTO("drbg_nopr_ctr_aes128"); static void drbg_kcapi_symsetkey(struct drbg_state *drbg, const unsigned char *key); static int drbg_kcapi_sym(struct drbg_state *drbg, unsigned char *outval, const struct drbg_string *in); static int drbg_init_sym_kernel(struct drbg_state *drbg); static int drbg_fini_sym_kernel(struct drbg_state *drbg); static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, u8 *inbuf, u32 inbuflen, u8 *outbuf, u32 outlen); #define DRBG_OUTSCRATCHLEN 256 /* BCC function for CTR DRBG as defined in 10.4.3 */ static int drbg_ctr_bcc(struct drbg_state *drbg, unsigned char *out, const unsigned char *key, struct list_head *in) { int ret = 0; struct drbg_string *curr = NULL; struct drbg_string data; short cnt = 0; drbg_string_fill(&data, out, drbg_blocklen(drbg)); /* 10.4.3 step 2 / 4 */ drbg_kcapi_symsetkey(drbg, key); list_for_each_entry(curr, in, list) { const unsigned char *pos = curr->buf; size_t len = curr->len; /* 10.4.3 step 4.1 */ while (len) { /* 10.4.3 step 4.2 */ if (drbg_blocklen(drbg) == cnt) { cnt = 0; ret = drbg_kcapi_sym(drbg, out, &data); if (ret) return ret; } out[cnt] ^= *pos; pos++; cnt++; len--; } } /* 10.4.3 step 4.2 for last block */ if (cnt) ret = drbg_kcapi_sym(drbg, out, &data); return ret; } /* * scratchpad usage: drbg_ctr_update is interlinked with drbg_ctr_df * (and drbg_ctr_bcc, but this function does not need any temporary buffers), * the scratchpad is used as follows: * drbg_ctr_update: * temp * start: drbg->scratchpad * length: drbg_statelen(drbg) + drbg_blocklen(drbg) * note: the cipher writing into this variable works * blocklen-wise. Now, when the statelen is not a multiple * of blocklen, the generateion loop below "spills over" * by at most blocklen. Thus, we need to give sufficient * memory. * df_data * start: drbg->scratchpad + * drbg_statelen(drbg) + drbg_blocklen(drbg) * length: drbg_statelen(drbg) * * drbg_ctr_df: * pad * start: df_data + drbg_statelen(drbg) * length: drbg_blocklen(drbg) * iv * start: pad + drbg_blocklen(drbg) * length: drbg_blocklen(drbg) * temp * start: iv + drbg_blocklen(drbg) * length: drbg_satelen(drbg) + drbg_blocklen(drbg) * note: temp is the buffer that the BCC function operates * on. BCC operates blockwise. drbg_statelen(drbg) * is sufficient when the DRBG state length is a multiple * of the block size. For AES192 (and maybe other ciphers) * this is not correct and the length for temp is * insufficient (yes, that also means for such ciphers, * the final output of all BCC rounds are truncated). * Therefore, add drbg_blocklen(drbg) to cover all * possibilities. */ /* Derivation Function for CTR DRBG as defined in 10.4.2 */ static int drbg_ctr_df(struct drbg_state *drbg, unsigned char *df_data, size_t bytes_to_return, struct list_head *seedlist) { int ret = -EFAULT; unsigned char L_N[8]; /* S3 is input */ struct drbg_string S1, S2, S4, cipherin; LIST_HEAD(bcc_list); unsigned char *pad = df_data + drbg_statelen(drbg); unsigned char *iv = pad + drbg_blocklen(drbg); unsigned char *temp = iv + drbg_blocklen(drbg); size_t padlen = 0; unsigned int templen = 0; /* 10.4.2 step 7 */ unsigned int i = 0; /* 10.4.2 step 8 */ const unsigned char *K = (unsigned char *) "\x00\x01\x02\x03\x04\x05\x06\x07" "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" "\x10\x11\x12\x13\x14\x15\x16\x17" "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"; unsigned char *X; size_t generated_len = 0; size_t inputlen = 0; struct drbg_string *seed = NULL; memset(pad, 0, drbg_blocklen(drbg)); memset(iv, 0, drbg_blocklen(drbg)); /* 10.4.2 step 1 is implicit as we work byte-wise */ /* 10.4.2 step 2 */ if ((512/8) < bytes_to_return) return -EINVAL; /* 10.4.2 step 2 -- calculate the entire length of all input data */ list_for_each_entry(seed, seedlist, list) inputlen += seed->len; drbg_cpu_to_be32(inputlen, &L_N[0]); /* 10.4.2 step 3 */ drbg_cpu_to_be32(bytes_to_return, &L_N[4]); /* 10.4.2 step 5: length is L_N, input_string, one byte, padding */ padlen = (inputlen + sizeof(L_N) + 1) % (drbg_blocklen(drbg)); /* wrap the padlen appropriately */ if (padlen) padlen = drbg_blocklen(drbg) - padlen; /* * pad / padlen contains the 0x80 byte and the following zero bytes. * As the calculated padlen value only covers the number of zero * bytes, this value has to be incremented by one for the 0x80 byte. */ padlen++; pad[0] = 0x80; /* 10.4.2 step 4 -- first fill the linked list and then order it */ drbg_string_fill(&S1, iv, drbg_blocklen(drbg)); list_add_tail(&S1.list, &bcc_list); drbg_string_fill(&S2, L_N, sizeof(L_N)); list_add_tail(&S2.list, &bcc_list); list_splice_tail(seedlist, &bcc_list); drbg_string_fill(&S4, pad, padlen); list_add_tail(&S4.list, &bcc_list); /* 10.4.2 step 9 */ while (templen < (drbg_keylen(drbg) + (drbg_blocklen(drbg)))) { /* * 10.4.2 step 9.1 - the padding is implicit as the buffer * holds zeros after allocation -- even the increment of i * is irrelevant as the increment remains within length of i */ drbg_cpu_to_be32(i, iv); /* 10.4.2 step 9.2 -- BCC and concatenation with temp */ ret = drbg_ctr_bcc(drbg, temp + templen, K, &bcc_list); if (ret) goto out; /* 10.4.2 step 9.3 */ i++; templen += drbg_blocklen(drbg); } /* 10.4.2 step 11 */ X = temp + (drbg_keylen(drbg)); drbg_string_fill(&cipherin, X, drbg_blocklen(drbg)); /* 10.4.2 step 12: overwriting of outval is implemented in next step */ /* 10.4.2 step 13 */ drbg_kcapi_symsetkey(drbg, temp); while (generated_len < bytes_to_return) { short blocklen = 0; /* * 10.4.2 step 13.1: the truncation of the key length is * implicit as the key is only drbg_blocklen in size based on * the implementation of the cipher function callback */ ret = drbg_kcapi_sym(drbg, X, &cipherin); if (ret) goto out; blocklen = (drbg_blocklen(drbg) < (bytes_to_return - generated_len)) ? drbg_blocklen(drbg) : (bytes_to_return - generated_len); /* 10.4.2 step 13.2 and 14 */ memcpy(df_data + generated_len, X, blocklen); generated_len += blocklen; } ret = 0; out: memset(iv, 0, drbg_blocklen(drbg)); memset(temp, 0, drbg_statelen(drbg) + drbg_blocklen(drbg)); memset(pad, 0, drbg_blocklen(drbg)); return ret; } /* * update function of CTR DRBG as defined in 10.2.1.2 * * The reseed variable has an enhanced meaning compared to the update * functions of the other DRBGs as follows: * 0 => initial seed from initialization * 1 => reseed via drbg_seed * 2 => first invocation from drbg_ctr_update when addtl is present. In * this case, the df_data scratchpad is not deleted so that it is * available for another calls to prevent calling the DF function * again. * 3 => second invocation from drbg_ctr_update. When the update function * was called with addtl, the df_data memory already contains the * DFed addtl information and we do not need to call DF again. */ static int drbg_ctr_update(struct drbg_state *drbg, struct list_head *seed, int reseed) { int ret = -EFAULT; /* 10.2.1.2 step 1 */ unsigned char *temp = drbg->scratchpad; unsigned char *df_data = drbg->scratchpad + drbg_statelen(drbg) + drbg_blocklen(drbg); if (3 > reseed) memset(df_data, 0, drbg_statelen(drbg)); if (!reseed) { /* * The DRBG uses the CTR mode of the underlying AES cipher. The * CTR mode increments the counter value after the AES operation * but SP800-90A requires that the counter is incremented before * the AES operation. Hence, we increment it at the time we set * it by one. */ crypto_inc(drbg->V, drbg_blocklen(drbg)); ret = crypto_skcipher_setkey(drbg->ctr_handle, drbg->C, drbg_keylen(drbg)); if (ret) goto out; } /* 10.2.1.3.2 step 2 and 10.2.1.4.2 step 2 */ if (seed) { ret = drbg_ctr_df(drbg, df_data, drbg_statelen(drbg), seed); if (ret) goto out; } ret = drbg_kcapi_sym_ctr(drbg, df_data, drbg_statelen(drbg), temp, drbg_statelen(drbg)); if (ret) return ret; /* 10.2.1.2 step 5 */ ret = crypto_skcipher_setkey(drbg->ctr_handle, temp, drbg_keylen(drbg)); if (ret) goto out; /* 10.2.1.2 step 6 */ memcpy(drbg->V, temp + drbg_keylen(drbg), drbg_blocklen(drbg)); /* See above: increment counter by one to compensate timing of CTR op */ crypto_inc(drbg->V, drbg_blocklen(drbg)); ret = 0; out: memset(temp, 0, drbg_statelen(drbg) + drbg_blocklen(drbg)); if (2 != reseed) memset(df_data, 0, drbg_statelen(drbg)); return ret; } /* * scratchpad use: drbg_ctr_update is called independently from * drbg_ctr_extract_bytes. Therefore, the scratchpad is reused */ /* Generate function of CTR DRBG as defined in 10.2.1.5.2 */ static int drbg_ctr_generate(struct drbg_state *drbg, unsigned char *buf, unsigned int buflen, struct list_head *addtl) { int ret; int len = min_t(int, buflen, INT_MAX); /* 10.2.1.5.2 step 2 */ if (addtl && !list_empty(addtl)) { ret = drbg_ctr_update(drbg, addtl, 2); if (ret) return 0; } /* 10.2.1.5.2 step 4.1 */ ret = drbg_kcapi_sym_ctr(drbg, NULL, 0, buf, len); if (ret) return ret; /* 10.2.1.5.2 step 6 */ ret = drbg_ctr_update(drbg, NULL, 3); if (ret) len = ret; return len; } static const struct drbg_state_ops drbg_ctr_ops = { .update = drbg_ctr_update, .generate = drbg_ctr_generate, .crypto_init = drbg_init_sym_kernel, .crypto_fini = drbg_fini_sym_kernel, }; #endif /* CONFIG_CRYPTO_DRBG_CTR */ /****************************************************************** * HMAC DRBG callback functions ******************************************************************/ #if defined(CONFIG_CRYPTO_DRBG_HASH) || defined(CONFIG_CRYPTO_DRBG_HMAC) static int drbg_kcapi_hash(struct drbg_state *drbg, unsigned char *outval, const struct list_head *in); static void drbg_kcapi_hmacsetkey(struct drbg_state *drbg, const unsigned char *key); static int drbg_init_hash_kernel(struct drbg_state *drbg); static int drbg_fini_hash_kernel(struct drbg_state *drbg); #endif /* (CONFIG_CRYPTO_DRBG_HASH || CONFIG_CRYPTO_DRBG_HMAC) */ #ifdef CONFIG_CRYPTO_DRBG_HMAC #define CRYPTO_DRBG_HMAC_STRING "HMAC " MODULE_ALIAS_CRYPTO("drbg_pr_hmac_sha512"); MODULE_ALIAS_CRYPTO("drbg_nopr_hmac_sha512"); MODULE_ALIAS_CRYPTO("drbg_pr_hmac_sha384"); MODULE_ALIAS_CRYPTO("drbg_nopr_hmac_sha384"); MODULE_ALIAS_CRYPTO("drbg_pr_hmac_sha256"); MODULE_ALIAS_CRYPTO("drbg_nopr_hmac_sha256"); /* update function of HMAC DRBG as defined in 10.1.2.2 */ static int drbg_hmac_update(struct drbg_state *drbg, struct list_head *seed, int reseed) { int ret = -EFAULT; int i = 0; struct drbg_string seed1, seed2, vdata; LIST_HEAD(seedlist); LIST_HEAD(vdatalist); if (!reseed) { /* 10.1.2.3 step 2 -- memset(0) of C is implicit with kzalloc */ memset(drbg->V, 1, drbg_statelen(drbg)); drbg_kcapi_hmacsetkey(drbg, drbg->C); } drbg_string_fill(&seed1, drbg->V, drbg_statelen(drbg)); list_add_tail(&seed1.list, &seedlist); /* buffer of seed2 will be filled in for loop below with one byte */ drbg_string_fill(&seed2, NULL, 1); list_add_tail(&seed2.list, &seedlist); /* input data of seed is allowed to be NULL at this point */ if (seed) list_splice_tail(seed, &seedlist); drbg_string_fill(&vdata, drbg->V, drbg_statelen(drbg)); list_add_tail(&vdata.list, &vdatalist); for (i = 2; 0 < i; i--) { /* first round uses 0x0, second 0x1 */ unsigned char prefix = DRBG_PREFIX0; if (1 == i) prefix = DRBG_PREFIX1; /* 10.1.2.2 step 1 and 4 -- concatenation and HMAC for key */ seed2.buf = &prefix; ret = drbg_kcapi_hash(drbg, drbg->C, &seedlist); if (ret) return ret; drbg_kcapi_hmacsetkey(drbg, drbg->C); /* 10.1.2.2 step 2 and 5 -- HMAC for V */ ret = drbg_kcapi_hash(drbg, drbg->V, &vdatalist); if (ret) return ret; /* 10.1.2.2 step 3 */ if (!seed) return ret; } return 0; } /* generate function of HMAC DRBG as defined in 10.1.2.5 */ static int drbg_hmac_generate(struct drbg_state *drbg, unsigned char *buf, unsigned int buflen, struct list_head *addtl) { int len = 0; int ret = 0; struct drbg_string data; LIST_HEAD(datalist); /* 10.1.2.5 step 2 */ if (addtl && !list_empty(addtl)) { ret = drbg_hmac_update(drbg, addtl, 1); if (ret) return ret; } drbg_string_fill(&data, drbg->V, drbg_statelen(drbg)); list_add_tail(&data.list, &datalist); while (len < buflen) { unsigned int outlen = 0; /* 10.1.2.5 step 4.1 */ ret = drbg_kcapi_hash(drbg, drbg->V, &datalist); if (ret) return ret; outlen = (drbg_blocklen(drbg) < (buflen - len)) ? drbg_blocklen(drbg) : (buflen - len); /* 10.1.2.5 step 4.2 */ memcpy(buf + len, drbg->V, outlen); len += outlen; } /* 10.1.2.5 step 6 */ if (addtl && !list_empty(addtl)) ret = drbg_hmac_update(drbg, addtl, 1); else ret = drbg_hmac_update(drbg, NULL, 1); if (ret) return ret; return len; } static const struct drbg_state_ops drbg_hmac_ops = { .update = drbg_hmac_update, .generate = drbg_hmac_generate, .crypto_init = drbg_init_hash_kernel, .crypto_fini = drbg_fini_hash_kernel, }; #endif /* CONFIG_CRYPTO_DRBG_HMAC */ /****************************************************************** * Hash DRBG callback functions ******************************************************************/ #ifdef CONFIG_CRYPTO_DRBG_HASH #define CRYPTO_DRBG_HASH_STRING "HASH " MODULE_ALIAS_CRYPTO("drbg_pr_sha512"); MODULE_ALIAS_CRYPTO("drbg_nopr_sha512"); MODULE_ALIAS_CRYPTO("drbg_pr_sha384"); MODULE_ALIAS_CRYPTO("drbg_nopr_sha384"); MODULE_ALIAS_CRYPTO("drbg_pr_sha256"); MODULE_ALIAS_CRYPTO("drbg_nopr_sha256"); /* * Increment buffer * * @dst buffer to increment * @add value to add */ static inline void drbg_add_buf(unsigned char *dst, size_t dstlen, const unsigned char *add, size_t addlen) { /* implied: dstlen > addlen */ unsigned char *dstptr; const unsigned char *addptr; unsigned int remainder = 0; size_t len = addlen; dstptr = dst + (dstlen-1); addptr = add + (addlen-1); while (len) { remainder += *dstptr + *addptr; *dstptr = remainder & 0xff; remainder >>= 8; len--; dstptr--; addptr--; } len = dstlen - addlen; while (len && remainder > 0) { remainder = *dstptr + 1; *dstptr = remainder & 0xff; remainder >>= 8; len--; dstptr--; } } /* * scratchpad usage: as drbg_hash_update and drbg_hash_df are used * interlinked, the scratchpad is used as follows: * drbg_hash_update * start: drbg->scratchpad * length: drbg_statelen(drbg) * drbg_hash_df: * start: drbg->scratchpad + drbg_statelen(drbg) * length: drbg_blocklen(drbg) * * drbg_hash_process_addtl uses the scratchpad, but fully completes * before either of the functions mentioned before are invoked. Therefore, * drbg_hash_process_addtl does not need to be specifically considered. */ /* Derivation Function for Hash DRBG as defined in 10.4.1 */ static int drbg_hash_df(struct drbg_state *drbg, unsigned char *outval, size_t outlen, struct list_head *entropylist) { int ret = 0; size_t len = 0; unsigned char input[5]; unsigned char *tmp = drbg->scratchpad + drbg_statelen(drbg); struct drbg_string data; /* 10.4.1 step 3 */ input[0] = 1; drbg_cpu_to_be32((outlen * 8), &input[1]); /* 10.4.1 step 4.1 -- concatenation of data for input into hash */ drbg_string_fill(&data, input, 5); list_add(&data.list, entropylist); /* 10.4.1 step 4 */ while (len < outlen) { short blocklen = 0; /* 10.4.1 step 4.1 */ ret = drbg_kcapi_hash(drbg, tmp, entropylist); if (ret) goto out; /* 10.4.1 step 4.2 */ input[0]++; blocklen = (drbg_blocklen(drbg) < (outlen - len)) ? drbg_blocklen(drbg) : (outlen - len); memcpy(outval + len, tmp, blocklen); len += blocklen; } out: memset(tmp, 0, drbg_blocklen(drbg)); return ret; } /* update function for Hash DRBG as defined in 10.1.1.2 / 10.1.1.3 */ static int drbg_hash_update(struct drbg_state *drbg, struct list_head *seed, int reseed) { int ret = 0; struct drbg_string data1, data2; LIST_HEAD(datalist); LIST_HEAD(datalist2); unsigned char *V = drbg->scratchpad; unsigned char prefix = DRBG_PREFIX1; if (!seed) return -EINVAL; if (reseed) { /* 10.1.1.3 step 1 */ memcpy(V, drbg->V, drbg_statelen(drbg)); drbg_string_fill(&data1, &prefix, 1); list_add_tail(&data1.list, &datalist); drbg_string_fill(&data2, V, drbg_statelen(drbg)); list_add_tail(&data2.list, &datalist); } list_splice_tail(seed, &datalist); /* 10.1.1.2 / 10.1.1.3 step 2 and 3 */ ret = drbg_hash_df(drbg, drbg->V, drbg_statelen(drbg), &datalist); if (ret) goto out; /* 10.1.1.2 / 10.1.1.3 step 4 */ prefix = DRBG_PREFIX0; drbg_string_fill(&data1, &prefix, 1); list_add_tail(&data1.list, &datalist2); drbg_string_fill(&data2, drbg->V, drbg_statelen(drbg)); list_add_tail(&data2.list, &datalist2); /* 10.1.1.2 / 10.1.1.3 step 4 */ ret = drbg_hash_df(drbg, drbg->C, drbg_statelen(drbg), &datalist2); out: memset(drbg->scratchpad, 0, drbg_statelen(drbg)); return ret; } /* processing of additional information string for Hash DRBG */ static int drbg_hash_process_addtl(struct drbg_state *drbg, struct list_head *addtl) { int ret = 0; struct drbg_string data1, data2; LIST_HEAD(datalist); unsigned char prefix = DRBG_PREFIX2; /* 10.1.1.4 step 2 */ if (!addtl || list_empty(addtl)) return 0; /* 10.1.1.4 step 2a */ drbg_string_fill(&data1, &prefix, 1); drbg_string_fill(&data2, drbg->V, drbg_statelen(drbg)); list_add_tail(&data1.list, &datalist); list_add_tail(&data2.list, &datalist); list_splice_tail(addtl, &datalist); ret = drbg_kcapi_hash(drbg, drbg->scratchpad, &datalist); if (ret) goto out; /* 10.1.1.4 step 2b */ drbg_add_buf(drbg->V, drbg_statelen(drbg), drbg->scratchpad, drbg_blocklen(drbg)); out: memset(drbg->scratchpad, 0, drbg_blocklen(drbg)); return ret; } /* Hashgen defined in 10.1.1.4 */ static int drbg_hash_hashgen(struct drbg_state *drbg, unsigned char *buf, unsigned int buflen) { int len = 0; int ret = 0; unsigned char *src = drbg->scratchpad; unsigned char *dst = drbg->scratchpad + drbg_statelen(drbg); struct drbg_string data; LIST_HEAD(datalist); /* 10.1.1.4 step hashgen 2 */ memcpy(src, drbg->V, drbg_statelen(drbg)); drbg_string_fill(&data, src, drbg_statelen(drbg)); list_add_tail(&data.list, &datalist); while (len < buflen) { unsigned int outlen = 0; /* 10.1.1.4 step hashgen 4.1 */ ret = drbg_kcapi_hash(drbg, dst, &datalist); if (ret) { len = ret; goto out; } outlen = (drbg_blocklen(drbg) < (buflen - len)) ? drbg_blocklen(drbg) : (buflen - len); /* 10.1.1.4 step hashgen 4.2 */ memcpy(buf + len, dst, outlen); len += outlen; /* 10.1.1.4 hashgen step 4.3 */ if (len < buflen) crypto_inc(src, drbg_statelen(drbg)); } out: memset(drbg->scratchpad, 0, (drbg_statelen(drbg) + drbg_blocklen(drbg))); return len; } /* generate function for Hash DRBG as defined in 10.1.1.4 */ static int drbg_hash_generate(struct drbg_state *drbg, unsigned char *buf, unsigned int buflen, struct list_head *addtl) { int len = 0; int ret = 0; union { unsigned char req[8]; __be64 req_int; } u; unsigned char prefix = DRBG_PREFIX3; struct drbg_string data1, data2; LIST_HEAD(datalist); /* 10.1.1.4 step 2 */ ret = drbg_hash_process_addtl(drbg, addtl); if (ret) return ret; /* 10.1.1.4 step 3 */ len = drbg_hash_hashgen(drbg, buf, buflen); /* this is the value H as documented in 10.1.1.4 */ /* 10.1.1.4 step 4 */ drbg_string_fill(&data1, &prefix, 1); list_add_tail(&data1.list, &datalist); drbg_string_fill(&data2, drbg->V, drbg_statelen(drbg)); list_add_tail(&data2.list, &datalist); ret = drbg_kcapi_hash(drbg, drbg->scratchpad, &datalist); if (ret) { len = ret; goto out; } /* 10.1.1.4 step 5 */ drbg_add_buf(drbg->V, drbg_statelen(drbg), drbg->scratchpad, drbg_blocklen(drbg)); drbg_add_buf(drbg->V, drbg_statelen(drbg), drbg->C, drbg_statelen(drbg)); u.req_int = cpu_to_be64(drbg->reseed_ctr); drbg_add_buf(drbg->V, drbg_statelen(drbg), u.req, 8); out: memset(drbg->scratchpad, 0, drbg_blocklen(drbg)); return len; } /* * scratchpad usage: as update and generate are used isolated, both * can use the scratchpad */ static const struct drbg_state_ops drbg_hash_ops = { .update = drbg_hash_update, .generate = drbg_hash_generate, .crypto_init = drbg_init_hash_kernel, .crypto_fini = drbg_fini_hash_kernel, }; #endif /* CONFIG_CRYPTO_DRBG_HASH */ /****************************************************************** * Functions common for DRBG implementations ******************************************************************/ static inline int __drbg_seed(struct drbg_state *drbg, struct list_head *seed, int reseed, enum drbg_seed_state new_seed_state) { int ret = drbg->d_ops->update(drbg, seed, reseed); if (ret) return ret; drbg->seeded = new_seed_state; drbg->last_seed_time = jiffies; /* 10.1.1.2 / 10.1.1.3 step 5 */ drbg->reseed_ctr = 1; switch (drbg->seeded) { case DRBG_SEED_STATE_UNSEEDED: /* Impossible, but handle it to silence compiler warnings. */ fallthrough; case DRBG_SEED_STATE_PARTIAL: /* * Require frequent reseeds until the seed source is * fully initialized. */ drbg->reseed_threshold = 50; break; case DRBG_SEED_STATE_FULL: /* * Seed source has become fully initialized, frequent * reseeds no longer required. */ drbg->reseed_threshold = drbg_max_requests(drbg); break; } return ret; } static inline int drbg_get_random_bytes(struct drbg_state *drbg, unsigned char *entropy, unsigned int entropylen) { int ret; do { get_random_bytes(entropy, entropylen); ret = drbg_fips_continuous_test(drbg, entropy); if (ret && ret != -EAGAIN) return ret; } while (ret); return 0; } static int drbg_seed_from_random(struct drbg_state *drbg) { struct drbg_string data; LIST_HEAD(seedlist); unsigned int entropylen = drbg_sec_strength(drbg->core->flags); unsigned char entropy[32]; int ret; BUG_ON(!entropylen); BUG_ON(entropylen > sizeof(entropy)); drbg_string_fill(&data, entropy, entropylen); list_add_tail(&data.list, &seedlist); ret = drbg_get_random_bytes(drbg, entropy, entropylen); if (ret) goto out; ret = __drbg_seed(drbg, &seedlist, true, DRBG_SEED_STATE_FULL); out: memzero_explicit(entropy, entropylen); return ret; } static bool drbg_nopr_reseed_interval_elapsed(struct drbg_state *drbg) { unsigned long next_reseed; /* Don't ever reseed from get_random_bytes() in test mode. */ if (list_empty(&drbg->test_data.list)) return false; /* * Obtain fresh entropy for the nopr DRBGs after 300s have * elapsed in order to still achieve sort of partial * prediction resistance over the time domain at least. Note * that the period of 300s has been chosen to match the * CRNG_RESEED_INTERVAL of the get_random_bytes()' chacha * rngs. */ next_reseed = drbg->last_seed_time + 300 * HZ; return time_after(jiffies, next_reseed); } /* * Seeding or reseeding of the DRBG * * @drbg: DRBG state struct * @pers: personalization / additional information buffer * @reseed: 0 for initial seed process, 1 for reseeding * * return: * 0 on success * error value otherwise */ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, bool reseed) { int ret; unsigned char entropy[((32 + 16) * 2)]; unsigned int entropylen = drbg_sec_strength(drbg->core->flags); struct drbg_string data1; LIST_HEAD(seedlist); enum drbg_seed_state new_seed_state = DRBG_SEED_STATE_FULL; /* 9.1 / 9.2 / 9.3.1 step 3 */ if (pers && pers->len > (drbg_max_addtl(drbg))) { pr_devel("DRBG: personalization string too long %zu\n", pers->len); return -EINVAL; } if (list_empty(&drbg->test_data.list)) { drbg_string_fill(&data1, drbg->test_data.buf, drbg->test_data.len); pr_devel("DRBG: using test entropy\n"); } else { /* * Gather entropy equal to the security strength of the DRBG. * With a derivation function, a nonce is required in addition * to the entropy. A nonce must be at least 1/2 of the security * strength of the DRBG in size. Thus, entropy + nonce is 3/2 * of the strength. The consideration of a nonce is only * applicable during initial seeding. */ BUG_ON(!entropylen); if (!reseed) entropylen = ((entropylen + 1) / 2) * 3; BUG_ON((entropylen * 2) > sizeof(entropy)); /* Get seed from in-kernel /dev/urandom */ if (!rng_is_initialized()) new_seed_state = DRBG_SEED_STATE_PARTIAL; ret = drbg_get_random_bytes(drbg, entropy, entropylen); if (ret) goto out; if (!drbg->jent) { drbg_string_fill(&data1, entropy, entropylen); pr_devel("DRBG: (re)seeding with %u bytes of entropy\n", entropylen); } else { /* * Get seed from Jitter RNG, failures are * fatal only in FIPS mode. */ ret = crypto_rng_get_bytes(drbg->jent, entropy + entropylen, entropylen); if (fips_enabled && ret) { pr_devel("DRBG: jent failed with %d\n", ret); /* * Do not treat the transient failure of the * Jitter RNG as an error that needs to be * reported. The combined number of the * maximum reseed threshold times the maximum * number of Jitter RNG transient errors is * less than the reseed threshold required by * SP800-90A allowing us to treat the * transient errors as such. * * However, we mandate that at least the first * seeding operation must succeed with the * Jitter RNG. */ if (!reseed || ret != -EAGAIN) goto out; } drbg_string_fill(&data1, entropy, entropylen * 2); pr_devel("DRBG: (re)seeding with %u bytes of entropy\n", entropylen * 2); } } list_add_tail(&data1.list, &seedlist); /* * concatenation of entropy with personalization str / addtl input) * the variable pers is directly handed in by the caller, so check its * contents whether it is appropriate */ if (pers && pers->buf && 0 < pers->len) { list_add_tail(&pers->list, &seedlist); pr_devel("DRBG: using personalization string\n"); } if (!reseed) { memset(drbg->V, 0, drbg_statelen(drbg)); memset(drbg->C, 0, drbg_statelen(drbg)); } ret = __drbg_seed(drbg, &seedlist, reseed, new_seed_state); out: memzero_explicit(entropy, entropylen * 2); return ret; } /* Free all substructures in a DRBG state without the DRBG state structure */ static inline void drbg_dealloc_state(struct drbg_state *drbg) { if (!drbg) return; kfree_sensitive(drbg->Vbuf); drbg->Vbuf = NULL; drbg->V = NULL; kfree_sensitive(drbg->Cbuf); drbg->Cbuf = NULL; drbg->C = NULL; kfree_sensitive(drbg->scratchpadbuf); drbg->scratchpadbuf = NULL; drbg->reseed_ctr = 0; drbg->d_ops = NULL; drbg->core = NULL; if (IS_ENABLED(CONFIG_CRYPTO_FIPS)) { kfree_sensitive(drbg->prev); drbg->prev = NULL; drbg->fips_primed = false; } } /* * Allocate all sub-structures for a DRBG state. * The DRBG state structure must already be allocated. */ static inline int drbg_alloc_state(struct drbg_state *drbg) { int ret = -ENOMEM; unsigned int sb_size = 0; switch (drbg->core->flags & DRBG_TYPE_MASK) { #ifdef CONFIG_CRYPTO_DRBG_HMAC case DRBG_HMAC: drbg->d_ops = &drbg_hmac_ops; break; #endif /* CONFIG_CRYPTO_DRBG_HMAC */ #ifdef CONFIG_CRYPTO_DRBG_HASH case DRBG_HASH: drbg->d_ops = &drbg_hash_ops; break; #endif /* CONFIG_CRYPTO_DRBG_HASH */ #ifdef CONFIG_CRYPTO_DRBG_CTR case DRBG_CTR: drbg->d_ops = &drbg_ctr_ops; break; #endif /* CONFIG_CRYPTO_DRBG_CTR */ default: ret = -EOPNOTSUPP; goto err; } ret = drbg->d_ops->crypto_init(drbg); if (ret < 0) goto err; drbg->Vbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL); if (!drbg->Vbuf) { ret = -ENOMEM; goto fini; } drbg->V = PTR_ALIGN(drbg->Vbuf, ret + 1); drbg->Cbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL); if (!drbg->Cbuf) { ret = -ENOMEM; goto fini; } drbg->C = PTR_ALIGN(drbg->Cbuf, ret + 1); /* scratchpad is only generated for CTR and Hash */ if (drbg->core->flags & DRBG_HMAC) sb_size = 0; else if (drbg->core->flags & DRBG_CTR) sb_size = drbg_statelen(drbg) + drbg_blocklen(drbg) + /* temp */ drbg_statelen(drbg) + /* df_data */ drbg_blocklen(drbg) + /* pad */ drbg_blocklen(drbg) + /* iv */ drbg_statelen(drbg) + drbg_blocklen(drbg); /* temp */ else sb_size = drbg_statelen(drbg) + drbg_blocklen(drbg); if (0 < sb_size) { drbg->scratchpadbuf = kzalloc(sb_size + ret, GFP_KERNEL); if (!drbg->scratchpadbuf) { ret = -ENOMEM; goto fini; } drbg->scratchpad = PTR_ALIGN(drbg->scratchpadbuf, ret + 1); } if (IS_ENABLED(CONFIG_CRYPTO_FIPS)) { drbg->prev = kzalloc(drbg_sec_strength(drbg->core->flags), GFP_KERNEL); if (!drbg->prev) { ret = -ENOMEM; goto fini; } drbg->fips_primed = false; } return 0; fini: drbg->d_ops->crypto_fini(drbg); err: drbg_dealloc_state(drbg); return ret; } /************************************************************************* * DRBG interface functions *************************************************************************/ /* * DRBG generate function as required by SP800-90A - this function * generates random numbers * * @drbg DRBG state handle * @buf Buffer where to store the random numbers -- the buffer must already * be pre-allocated by caller * @buflen Length of output buffer - this value defines the number of random * bytes pulled from DRBG * @addtl Additional input that is mixed into state, may be NULL -- note * the entropy is pulled by the DRBG internally unconditionally * as defined in SP800-90A. The additional input is mixed into * the state in addition to the pulled entropy. * * return: 0 when all bytes are generated; < 0 in case of an error */ static int drbg_generate(struct drbg_state *drbg, unsigned char *buf, unsigned int buflen, struct drbg_string *addtl) { int len = 0; LIST_HEAD(addtllist); if (!drbg->core) { pr_devel("DRBG: not yet seeded\n"); return -EINVAL; } if (0 == buflen || !buf) { pr_devel("DRBG: no output buffer provided\n"); return -EINVAL; } if (addtl && NULL == addtl->buf && 0 < addtl->len) { pr_devel("DRBG: wrong format of additional information\n"); return -EINVAL; } /* 9.3.1 step 2 */ len = -EINVAL; if (buflen > (drbg_max_request_bytes(drbg))) { pr_devel("DRBG: requested random numbers too large %u\n", buflen); goto err; } /* 9.3.1 step 3 is implicit with the chosen DRBG */ /* 9.3.1 step 4 */ if (addtl && addtl->len > (drbg_max_addtl(drbg))) { pr_devel("DRBG: additional information string too long %zu\n", addtl->len); goto err; } /* 9.3.1 step 5 is implicit with the chosen DRBG */ /* * 9.3.1 step 6 and 9 supplemented by 9.3.2 step c is implemented * here. The spec is a bit convoluted here, we make it simpler. */ if (drbg->reseed_threshold < drbg->reseed_ctr) drbg->seeded = DRBG_SEED_STATE_UNSEEDED; if (drbg->pr || drbg->seeded == DRBG_SEED_STATE_UNSEEDED) { pr_devel("DRBG: reseeding before generation (prediction " "resistance: %s, state %s)\n", str_true_false(drbg->pr), (drbg->seeded == DRBG_SEED_STATE_FULL ? "seeded" : "unseeded")); /* 9.3.1 steps 7.1 through 7.3 */ len = drbg_seed(drbg, addtl, true); if (len) goto err; /* 9.3.1 step 7.4 */ addtl = NULL; } else if (rng_is_initialized() && (drbg->seeded == DRBG_SEED_STATE_PARTIAL || drbg_nopr_reseed_interval_elapsed(drbg))) { len = drbg_seed_from_random(drbg); if (len) goto err; } if (addtl && 0 < addtl->len) list_add_tail(&addtl->list, &addtllist); /* 9.3.1 step 8 and 10 */ len = drbg->d_ops->generate(drbg, buf, buflen, &addtllist); /* 10.1.1.4 step 6, 10.1.2.5 step 7, 10.2.1.5.2 step 7 */ drbg->reseed_ctr++; if (0 >= len) goto err; /* * Section 11.3.3 requires to re-perform self tests after some * generated random numbers. The chosen value after which self * test is performed is arbitrary, but it should be reasonable. * However, we do not perform the self tests because of the following * reasons: it is mathematically impossible that the initial self tests * were successfully and the following are not. If the initial would * pass and the following would not, the kernel integrity is violated. * In this case, the entire kernel operation is questionable and it * is unlikely that the integrity violation only affects the * correct operation of the DRBG. * * Albeit the following code is commented out, it is provided in * case somebody has a need to implement the test of 11.3.3. */ #if 0 if (drbg->reseed_ctr && !(drbg->reseed_ctr % 4096)) { int err = 0; pr_devel("DRBG: start to perform self test\n"); if (drbg->core->flags & DRBG_HMAC) err = alg_test("drbg_pr_hmac_sha512", "drbg_pr_hmac_sha512", 0, 0); else if (drbg->core->flags & DRBG_CTR) err = alg_test("drbg_pr_ctr_aes256", "drbg_pr_ctr_aes256", 0, 0); else err = alg_test("drbg_pr_sha256", "drbg_pr_sha256", 0, 0); if (err) { pr_err("DRBG: periodical self test failed\n"); /* * uninstantiate implies that from now on, only errors * are returned when reusing this DRBG cipher handle */ drbg_uninstantiate(drbg); return 0; } else { pr_devel("DRBG: self test successful\n"); } } #endif /* * All operations were successful, return 0 as mandated by * the kernel crypto API interface. */ len = 0; err: return len; } /* * Wrapper around drbg_generate which can pull arbitrary long strings * from the DRBG without hitting the maximum request limitation. * * Parameters: see drbg_generate * Return codes: see drbg_generate -- if one drbg_generate request fails, * the entire drbg_generate_long request fails */ static int drbg_generate_long(struct drbg_state *drbg, unsigned char *buf, unsigned int buflen, struct drbg_string *addtl) { unsigned int len = 0; unsigned int slice = 0; do { int err = 0; unsigned int chunk = 0; slice = ((buflen - len) / drbg_max_request_bytes(drbg)); chunk = slice ? drbg_max_request_bytes(drbg) : (buflen - len); mutex_lock(&drbg->drbg_mutex); err = drbg_generate(drbg, buf + len, chunk, addtl); mutex_unlock(&drbg->drbg_mutex); if (0 > err) return err; len += chunk; } while (slice > 0 && (len < buflen)); return 0; } static int drbg_prepare_hrng(struct drbg_state *drbg) { /* We do not need an HRNG in test mode. */ if (list_empty(&drbg->test_data.list)) return 0; drbg->jent = crypto_alloc_rng("jitterentropy_rng", 0, 0); if (IS_ERR(drbg->jent)) { const int err = PTR_ERR(drbg->jent); drbg->jent = NULL; if (fips_enabled) return err; pr_info("DRBG: Continuing without Jitter RNG\n"); } return 0; } /* * DRBG instantiation function as required by SP800-90A - this function * sets up the DRBG handle, performs the initial seeding and all sanity * checks required by SP800-90A * * @drbg memory of state -- if NULL, new memory is allocated * @pers Personalization string that is mixed into state, may be NULL -- note * the entropy is pulled by the DRBG internally unconditionally * as defined in SP800-90A. The additional input is mixed into * the state in addition to the pulled entropy. * @coreref reference to core * @pr prediction resistance enabled * * return * 0 on success * error value otherwise */ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers, int coreref, bool pr) { int ret; bool reseed = true; pr_devel("DRBG: Initializing DRBG core %d with prediction resistance " "%s\n", coreref, str_enabled_disabled(pr)); mutex_lock(&drbg->drbg_mutex); /* 9.1 step 1 is implicit with the selected DRBG type */ /* * 9.1 step 2 is implicit as caller can select prediction resistance * and the flag is copied into drbg->flags -- * all DRBG types support prediction resistance */ /* 9.1 step 4 is implicit in drbg_sec_strength */ if (!drbg->core) { drbg->core = &drbg_cores[coreref]; drbg->pr = pr; drbg->seeded = DRBG_SEED_STATE_UNSEEDED; drbg->last_seed_time = 0; drbg->reseed_threshold = drbg_max_requests(drbg); ret = drbg_alloc_state(drbg); if (ret) goto unlock; ret = drbg_prepare_hrng(drbg); if (ret) goto free_everything; reseed = false; } ret = drbg_seed(drbg, pers, reseed); if (ret && !reseed) goto free_everything; mutex_unlock(&drbg->drbg_mutex); return ret; unlock: mutex_unlock(&drbg->drbg_mutex); return ret; free_everything: mutex_unlock(&drbg->drbg_mutex); drbg_uninstantiate(drbg); return ret; } /* * DRBG uninstantiate function as required by SP800-90A - this function * frees all buffers and the DRBG handle * * @drbg DRBG state handle * * return * 0 on success */ static int drbg_uninstantiate(struct drbg_state *drbg) { if (!IS_ERR_OR_NULL(drbg->jent)) crypto_free_rng(drbg->jent); drbg->jent = NULL; if (drbg->d_ops) drbg->d_ops->crypto_fini(drbg); drbg_dealloc_state(drbg); /* no scrubbing of test_data -- this shall survive an uninstantiate */ return 0; } /* * Helper function for setting the test data in the DRBG * * @drbg DRBG state handle * @data test data * @len test data length */ static void drbg_kcapi_set_entropy(struct crypto_rng *tfm, const u8 *data, unsigned int len) { struct drbg_state *drbg = crypto_rng_ctx(tfm); mutex_lock(&drbg->drbg_mutex); drbg_string_fill(&drbg->test_data, data, len); mutex_unlock(&drbg->drbg_mutex); } /*************************************************************** * Kernel crypto API cipher invocations requested by DRBG ***************************************************************/ #if defined(CONFIG_CRYPTO_DRBG_HASH) || defined(CONFIG_CRYPTO_DRBG_HMAC) struct sdesc { struct shash_desc shash; char ctx[]; }; static int drbg_init_hash_kernel(struct drbg_state *drbg) { struct sdesc *sdesc; struct crypto_shash *tfm; tfm = crypto_alloc_shash(drbg->core->backend_cra_name, 0, 0); if (IS_ERR(tfm)) { pr_info("DRBG: could not allocate digest TFM handle: %s\n", drbg->core->backend_cra_name); return PTR_ERR(tfm); } BUG_ON(drbg_blocklen(drbg) != crypto_shash_digestsize(tfm)); sdesc = kzalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm), GFP_KERNEL); if (!sdesc) { crypto_free_shash(tfm); return -ENOMEM; } sdesc->shash.tfm = tfm; drbg->priv_data = sdesc; return 0; } static int drbg_fini_hash_kernel(struct drbg_state *drbg) { struct sdesc *sdesc = drbg->priv_data; if (sdesc) { crypto_free_shash(sdesc->shash.tfm); kfree_sensitive(sdesc); } drbg->priv_data = NULL; return 0; } static void drbg_kcapi_hmacsetkey(struct drbg_state *drbg, const unsigned char *key) { struct sdesc *sdesc = drbg->priv_data; crypto_shash_setkey(sdesc->shash.tfm, key, drbg_statelen(drbg)); } static int drbg_kcapi_hash(struct drbg_state *drbg, unsigned char *outval, const struct list_head *in) { struct sdesc *sdesc = drbg->priv_data; struct drbg_string *input = NULL; crypto_shash_init(&sdesc->shash); list_for_each_entry(input, in, list) crypto_shash_update(&sdesc->shash, input->buf, input->len); return crypto_shash_final(&sdesc->shash, outval); } #endif /* (CONFIG_CRYPTO_DRBG_HASH || CONFIG_CRYPTO_DRBG_HMAC) */ #ifdef CONFIG_CRYPTO_DRBG_CTR static int drbg_fini_sym_kernel(struct drbg_state *drbg) { struct crypto_cipher *tfm = (struct crypto_cipher *)drbg->priv_data; if (tfm) crypto_free_cipher(tfm); drbg->priv_data = NULL; if (drbg->ctr_handle) crypto_free_skcipher(drbg->ctr_handle); drbg->ctr_handle = NULL; if (drbg->ctr_req) skcipher_request_free(drbg->ctr_req); drbg->ctr_req = NULL; kfree(drbg->outscratchpadbuf); drbg->outscratchpadbuf = NULL; return 0; } static int drbg_init_sym_kernel(struct drbg_state *drbg) { struct crypto_cipher *tfm; struct crypto_skcipher *sk_tfm; struct skcipher_request *req; unsigned int alignmask; char ctr_name[CRYPTO_MAX_ALG_NAME]; tfm = crypto_alloc_cipher(drbg->core->backend_cra_name, 0, 0); if (IS_ERR(tfm)) { pr_info("DRBG: could not allocate cipher TFM handle: %s\n", drbg->core->backend_cra_name); return PTR_ERR(tfm); } BUG_ON(drbg_blocklen(drbg) != crypto_cipher_blocksize(tfm)); drbg->priv_data = tfm; if (snprintf(ctr_name, CRYPTO_MAX_ALG_NAME, "ctr(%s)", drbg->core->backend_cra_name) >= CRYPTO_MAX_ALG_NAME) { drbg_fini_sym_kernel(drbg); return -EINVAL; } sk_tfm = crypto_alloc_skcipher(ctr_name, 0, 0); if (IS_ERR(sk_tfm)) { pr_info("DRBG: could not allocate CTR cipher TFM handle: %s\n", ctr_name); drbg_fini_sym_kernel(drbg); return PTR_ERR(sk_tfm); } drbg->ctr_handle = sk_tfm; crypto_init_wait(&drbg->ctr_wait); req = skcipher_request_alloc(sk_tfm, GFP_KERNEL); if (!req) { pr_info("DRBG: could not allocate request queue\n"); drbg_fini_sym_kernel(drbg); return -ENOMEM; } drbg->ctr_req = req; skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &drbg->ctr_wait); alignmask = crypto_skcipher_alignmask(sk_tfm); drbg->outscratchpadbuf = kmalloc(DRBG_OUTSCRATCHLEN + alignmask, GFP_KERNEL); if (!drbg->outscratchpadbuf) { drbg_fini_sym_kernel(drbg); return -ENOMEM; } drbg->outscratchpad = (u8 *)PTR_ALIGN(drbg->outscratchpadbuf, alignmask + 1); sg_init_table(&drbg->sg_in, 1); sg_init_one(&drbg->sg_out, drbg->outscratchpad, DRBG_OUTSCRATCHLEN); return alignmask; } static void drbg_kcapi_symsetkey(struct drbg_state *drbg, const unsigned char *key) { struct crypto_cipher *tfm = drbg->priv_data; crypto_cipher_setkey(tfm, key, (drbg_keylen(drbg))); } static int drbg_kcapi_sym(struct drbg_state *drbg, unsigned char *outval, const struct drbg_string *in) { struct crypto_cipher *tfm = drbg->priv_data; /* there is only component in *in */ BUG_ON(in->len < drbg_blocklen(drbg)); crypto_cipher_encrypt_one(tfm, outval, in->buf); return 0; } static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, u8 *inbuf, u32 inlen, u8 *outbuf, u32 outlen) { struct scatterlist *sg_in = &drbg->sg_in, *sg_out = &drbg->sg_out; u32 scratchpad_use = min_t(u32, outlen, DRBG_OUTSCRATCHLEN); int ret; if (inbuf) { /* Use caller-provided input buffer */ sg_set_buf(sg_in, inbuf, inlen); } else { /* Use scratchpad for in-place operation */ inlen = scratchpad_use; memset(drbg->outscratchpad, 0, scratchpad_use); sg_set_buf(sg_in, drbg->outscratchpad, scratchpad_use); } while (outlen) { u32 cryptlen = min3(inlen, outlen, (u32)DRBG_OUTSCRATCHLEN); /* Output buffer may not be valid for SGL, use scratchpad */ skcipher_request_set_crypt(drbg->ctr_req, sg_in, sg_out, cryptlen, drbg->V); ret = crypto_wait_req(crypto_skcipher_encrypt(drbg->ctr_req), &drbg->ctr_wait); if (ret) goto out; crypto_init_wait(&drbg->ctr_wait); memcpy(outbuf, drbg->outscratchpad, cryptlen); memzero_explicit(drbg->outscratchpad, cryptlen); outlen -= cryptlen; outbuf += cryptlen; } ret = 0; out: return ret; } #endif /* CONFIG_CRYPTO_DRBG_CTR */ /*************************************************************** * Kernel crypto API interface to register DRBG ***************************************************************/ /* * Look up the DRBG flags by given kernel crypto API cra_name * The code uses the drbg_cores definition to do this * * @cra_name kernel crypto API cra_name * @coreref reference to integer which is filled with the pointer to * the applicable core * @pr reference for setting prediction resistance * * return: flags */ static inline void drbg_convert_tfm_core(const char *cra_driver_name, int *coreref, bool *pr) { int i = 0; size_t start = 0; int len = 0; *pr = true; /* disassemble the names */ if (!memcmp(cra_driver_name, "drbg_nopr_", 10)) { start = 10; *pr = false; } else if (!memcmp(cra_driver_name, "drbg_pr_", 8)) { start = 8; } else { return; } /* remove the first part */ len = strlen(cra_driver_name) - start; for (i = 0; ARRAY_SIZE(drbg_cores) > i; i++) { if (!memcmp(cra_driver_name + start, drbg_cores[i].cra_name, len)) { *coreref = i; return; } } } static int drbg_kcapi_init(struct crypto_tfm *tfm) { struct drbg_state *drbg = crypto_tfm_ctx(tfm); mutex_init(&drbg->drbg_mutex); return 0; } static void drbg_kcapi_cleanup(struct crypto_tfm *tfm) { drbg_uninstantiate(crypto_tfm_ctx(tfm)); } /* * Generate random numbers invoked by the kernel crypto API: * The API of the kernel crypto API is extended as follows: * * src is additional input supplied to the RNG. * slen is the length of src. * dst is the output buffer where random data is to be stored. * dlen is the length of dst. */ static int drbg_kcapi_random(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int dlen) { struct drbg_state *drbg = crypto_rng_ctx(tfm); struct drbg_string *addtl = NULL; struct drbg_string string; if (slen) { /* linked list variable is now local to allow modification */ drbg_string_fill(&string, src, slen); addtl = &string; } return drbg_generate_long(drbg, dst, dlen, addtl); } /* * Seed the DRBG invoked by the kernel crypto API */ static int drbg_kcapi_seed(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) { struct drbg_state *drbg = crypto_rng_ctx(tfm); struct crypto_tfm *tfm_base = crypto_rng_tfm(tfm); bool pr = false; struct drbg_string string; struct drbg_string *seed_string = NULL; int coreref = 0; drbg_convert_tfm_core(crypto_tfm_alg_driver_name(tfm_base), &coreref, &pr); if (0 < slen) { drbg_string_fill(&string, seed, slen); seed_string = &string; } return drbg_instantiate(drbg, seed_string, coreref, pr); } /*************************************************************** * Kernel module: code to load the module ***************************************************************/ /* * Tests as defined in 11.3.2 in addition to the cipher tests: testing * of the error handling. * * Note: testing of failing seed source as defined in 11.3.2 is not applicable * as seed source of get_random_bytes does not fail. * * Note 2: There is no sensible way of testing the reseed counter * enforcement, so skip it. */ static inline int __init drbg_healthcheck_sanity(void) { int len = 0; #define OUTBUFLEN 16 unsigned char buf[OUTBUFLEN]; struct drbg_state *drbg = NULL; int ret; int rc = -EFAULT; bool pr = false; int coreref = 0; struct drbg_string addtl; size_t max_addtllen, max_request_bytes; /* only perform test in FIPS mode */ if (!fips_enabled) return 0; #ifdef CONFIG_CRYPTO_DRBG_CTR drbg_convert_tfm_core("drbg_nopr_ctr_aes256", &coreref, &pr); #endif #ifdef CONFIG_CRYPTO_DRBG_HASH drbg_convert_tfm_core("drbg_nopr_sha256", &coreref, &pr); #endif #ifdef CONFIG_CRYPTO_DRBG_HMAC drbg_convert_tfm_core("drbg_nopr_hmac_sha512", &coreref, &pr); #endif drbg = kzalloc(sizeof(struct drbg_state), GFP_KERNEL); if (!drbg) return -ENOMEM; mutex_init(&drbg->drbg_mutex); drbg->core = &drbg_cores[coreref]; drbg->reseed_threshold = drbg_max_requests(drbg); /* * if the following tests fail, it is likely that there is a buffer * overflow as buf is much smaller than the requested or provided * string lengths -- in case the error handling does not succeed * we may get an OOPS. And we want to get an OOPS as this is a * grave bug. */ max_addtllen = drbg_max_addtl(drbg); max_request_bytes = drbg_max_request_bytes(drbg); drbg_string_fill(&addtl, buf, max_addtllen + 1); /* overflow addtllen with additonal info string */ len = drbg_generate(drbg, buf, OUTBUFLEN, &addtl); BUG_ON(0 < len); /* overflow max_bits */ len = drbg_generate(drbg, buf, (max_request_bytes + 1), NULL); BUG_ON(0 < len); /* overflow max addtllen with personalization string */ ret = drbg_seed(drbg, &addtl, false); BUG_ON(0 == ret); /* all tests passed */ rc = 0; pr_devel("DRBG: Sanity tests for failure code paths successfully " "completed\n"); kfree(drbg); return rc; } static struct rng_alg drbg_algs[22]; /* * Fill the array drbg_algs used to register the different DRBGs * with the kernel crypto API. To fill the array, the information * from drbg_cores[] is used. */ static inline void __init drbg_fill_array(struct rng_alg *alg, const struct drbg_core *core, int pr) { int pos = 0; static int priority = 200; memcpy(alg->base.cra_name, "stdrng", 6); if (pr) { memcpy(alg->base.cra_driver_name, "drbg_pr_", 8); pos = 8; } else { memcpy(alg->base.cra_driver_name, "drbg_nopr_", 10); pos = 10; } memcpy(alg->base.cra_driver_name + pos, core->cra_name, strlen(core->cra_name)); alg->base.cra_priority = priority; priority++; /* * If FIPS mode enabled, the selected DRBG shall have the * highest cra_priority over other stdrng instances to ensure * it is selected. */ if (fips_enabled) alg->base.cra_priority += 200; alg->base.cra_ctxsize = sizeof(struct drbg_state); alg->base.cra_module = THIS_MODULE; alg->base.cra_init = drbg_kcapi_init; alg->base.cra_exit = drbg_kcapi_cleanup; alg->generate = drbg_kcapi_random; alg->seed = drbg_kcapi_seed; alg->set_ent = drbg_kcapi_set_entropy; alg->seedsize = 0; } static int __init drbg_init(void) { unsigned int i = 0; /* pointer to drbg_algs */ unsigned int j = 0; /* pointer to drbg_cores */ int ret; ret = drbg_healthcheck_sanity(); if (ret) return ret; if (ARRAY_SIZE(drbg_cores) * 2 > ARRAY_SIZE(drbg_algs)) { pr_info("DRBG: Cannot register all DRBG types" "(slots needed: %zu, slots available: %zu)\n", ARRAY_SIZE(drbg_cores) * 2, ARRAY_SIZE(drbg_algs)); return -EFAULT; } /* * each DRBG definition can be used with PR and without PR, thus * we instantiate each DRBG in drbg_cores[] twice. * * As the order of placing them into the drbg_algs array matters * (the later DRBGs receive a higher cra_priority) we register the * prediction resistance DRBGs first as the should not be too * interesting. */ for (j = 0; ARRAY_SIZE(drbg_cores) > j; j++, i++) drbg_fill_array(&drbg_algs[i], &drbg_cores[j], 1); for (j = 0; ARRAY_SIZE(drbg_cores) > j; j++, i++) drbg_fill_array(&drbg_algs[i], &drbg_cores[j], 0); return crypto_register_rngs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2)); } static void __exit drbg_exit(void) { crypto_unregister_rngs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2)); } subsys_initcall(drbg_init); module_exit(drbg_exit); #ifndef CRYPTO_DRBG_HASH_STRING #define CRYPTO_DRBG_HASH_STRING "" #endif #ifndef CRYPTO_DRBG_HMAC_STRING #define CRYPTO_DRBG_HMAC_STRING "" #endif #ifndef CRYPTO_DRBG_CTR_STRING #define CRYPTO_DRBG_CTR_STRING "" #endif MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stephan Mueller <smueller@chronox.de>"); MODULE_DESCRIPTION("NIST SP800-90A Deterministic Random Bit Generator (DRBG) " "using following cores: " CRYPTO_DRBG_HASH_STRING CRYPTO_DRBG_HMAC_STRING CRYPTO_DRBG_CTR_STRING); MODULE_ALIAS_CRYPTO("stdrng"); MODULE_IMPORT_NS("CRYPTO_INTERNAL");
17 17 10 10 12 12 12 3 3 5 12 12 12 12 12 12 8 7 12 12 9 9 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2007-2014 Nicira, Inc. */ #include <linux/etherdevice.h> #include <linux/if.h> #include <linux/if_vlan.h> #include <linux/jhash.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/rcupdate.h> #include <linux/rtnetlink.h> #include <linux/compat.h> #include <net/net_namespace.h> #include <linux/module.h> #include "datapath.h" #include "vport.h" #include "vport-internal_dev.h" static LIST_HEAD(vport_ops_list); /* Protected by RCU read lock for reading, ovs_mutex for writing. */ static struct hlist_head *dev_table; #define VPORT_HASH_BUCKETS 1024 /** * ovs_vport_init - initialize vport subsystem * * Called at module load time to initialize the vport subsystem. */ int ovs_vport_init(void) { dev_table = kcalloc(VPORT_HASH_BUCKETS, sizeof(struct hlist_head), GFP_KERNEL); if (!dev_table) return -ENOMEM; return 0; } /** * ovs_vport_exit - shutdown vport subsystem * * Called at module exit time to shutdown the vport subsystem. */ void ovs_vport_exit(void) { kfree(dev_table); } static struct hlist_head *hash_bucket(const struct net *net, const char *name) { unsigned int hash = jhash(name, strlen(name), (unsigned long) net); return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; } int __ovs_vport_ops_register(struct vport_ops *ops) { int err = -EEXIST; struct vport_ops *o; ovs_lock(); list_for_each_entry(o, &vport_ops_list, list) if (ops->type == o->type) goto errout; list_add_tail(&ops->list, &vport_ops_list); err = 0; errout: ovs_unlock(); return err; } EXPORT_SYMBOL_GPL(__ovs_vport_ops_register); void ovs_vport_ops_unregister(struct vport_ops *ops) { ovs_lock(); list_del(&ops->list); ovs_unlock(); } EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister); /** * ovs_vport_locate - find a port that has already been created * * @net: network namespace * @name: name of port to find * * Must be called with ovs or RCU read lock. */ struct vport *ovs_vport_locate(const struct net *net, const char *name) { struct hlist_head *bucket = hash_bucket(net, name); struct vport *vport; hlist_for_each_entry_rcu(vport, bucket, hash_node, lockdep_ovsl_is_held()) if (!strcmp(name, ovs_vport_name(vport)) && net_eq(ovs_dp_get_net(vport->dp), net)) return vport; return NULL; } /** * ovs_vport_alloc - allocate and initialize new vport * * @priv_size: Size of private data area to allocate. * @ops: vport device ops * @parms: information about new vport. * * Allocate and initialize a new vport defined by @ops. The vport will contain * a private data area of size @priv_size that can be accessed using * vport_priv(). Some parameters of the vport will be initialized from @parms. * @vports that are no longer needed should be released with * vport_free(). */ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, const struct vport_parms *parms) { struct vport *vport; size_t alloc_size; int err; alloc_size = sizeof(struct vport); if (priv_size) { alloc_size = ALIGN(alloc_size, VPORT_ALIGN); alloc_size += priv_size; } vport = kzalloc(alloc_size, GFP_KERNEL); if (!vport) return ERR_PTR(-ENOMEM); vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu); if (!vport->upcall_stats) { err = -ENOMEM; goto err_kfree_vport; } vport->dp = parms->dp; vport->port_no = parms->port_no; vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) { err = -EINVAL; goto err_free_percpu; } return vport; err_free_percpu: free_percpu(vport->upcall_stats); err_kfree_vport: kfree(vport); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(ovs_vport_alloc); /** * ovs_vport_free - uninitialize and free vport * * @vport: vport to free * * Frees a vport allocated with vport_alloc() when it is no longer needed. * * The caller must ensure that an RCU grace period has passed since the last * time @vport was in a datapath. */ void ovs_vport_free(struct vport *vport) { /* vport is freed from RCU callback or error path, Therefore * it is safe to use raw dereference. */ kfree(rcu_dereference_raw(vport->upcall_portids)); free_percpu(vport->upcall_stats); kfree(vport); } EXPORT_SYMBOL_GPL(ovs_vport_free); static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms) { struct vport_ops *ops; list_for_each_entry(ops, &vport_ops_list, list) if (ops->type == parms->type) return ops; return NULL; } /** * ovs_vport_add - add vport device (for kernel callers) * * @parms: Information about new vport. * * Creates a new vport with the specified configuration (which is dependent on * device type). ovs_mutex must be held. */ struct vport *ovs_vport_add(const struct vport_parms *parms) { struct vport_ops *ops; struct vport *vport; ops = ovs_vport_lookup(parms); if (ops) { struct hlist_head *bucket; if (!try_module_get(ops->owner)) return ERR_PTR(-EAFNOSUPPORT); vport = ops->create(parms); if (IS_ERR(vport)) { module_put(ops->owner); return vport; } bucket = hash_bucket(ovs_dp_get_net(vport->dp), ovs_vport_name(vport)); hlist_add_head_rcu(&vport->hash_node, bucket); return vport; } /* Unlock to attempt module load and return -EAGAIN if load * was successful as we need to restart the port addition * workflow. */ ovs_unlock(); request_module("vport-type-%d", parms->type); ovs_lock(); if (!ovs_vport_lookup(parms)) return ERR_PTR(-EAFNOSUPPORT); else return ERR_PTR(-EAGAIN); } /** * ovs_vport_set_options - modify existing vport device (for kernel callers) * * @vport: vport to modify. * @options: New configuration. * * Modifies an existing device with the specified configuration (which is * dependent on device type). ovs_mutex must be held. */ int ovs_vport_set_options(struct vport *vport, struct nlattr *options) { if (!vport->ops->set_options) return -EOPNOTSUPP; return vport->ops->set_options(vport, options); } /** * ovs_vport_del - delete existing vport device * * @vport: vport to delete. * * Detaches @vport from its datapath and destroys it. ovs_mutex must * be held. */ void ovs_vport_del(struct vport *vport) { hlist_del_rcu(&vport->hash_node); module_put(vport->ops->owner); vport->ops->destroy(vport); } /** * ovs_vport_get_stats - retrieve device stats * * @vport: vport from which to retrieve the stats * @stats: location to store stats * * Retrieves transmit, receive, and error stats for the given device. * * Must be called with ovs_mutex or rcu_read_lock. */ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) { const struct rtnl_link_stats64 *dev_stats; struct rtnl_link_stats64 temp; dev_stats = dev_get_stats(vport->dev, &temp); stats->rx_errors = dev_stats->rx_errors; stats->tx_errors = dev_stats->tx_errors; stats->tx_dropped = dev_stats->tx_dropped; stats->rx_dropped = dev_stats->rx_dropped; stats->rx_bytes = dev_stats->rx_bytes; stats->rx_packets = dev_stats->rx_packets; stats->tx_bytes = dev_stats->tx_bytes; stats->tx_packets = dev_stats->tx_packets; } /** * ovs_vport_get_upcall_stats - retrieve upcall stats * * @vport: vport from which to retrieve the stats. * @skb: sk_buff where upcall stats should be appended. * * Retrieves upcall stats for the given device. * * Must be called with ovs_mutex or rcu_read_lock. */ int ovs_vport_get_upcall_stats(struct vport *vport, struct sk_buff *skb) { struct nlattr *nla; int i; __u64 tx_success = 0; __u64 tx_fail = 0; for_each_possible_cpu(i) { const struct vport_upcall_stats_percpu *stats; unsigned int start; stats = per_cpu_ptr(vport->upcall_stats, i); do { start = u64_stats_fetch_begin(&stats->syncp); tx_success += u64_stats_read(&stats->n_success); tx_fail += u64_stats_read(&stats->n_fail); } while (u64_stats_fetch_retry(&stats->syncp, start)); } nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_UPCALL_STATS); if (!nla) return -EMSGSIZE; if (nla_put_u64_64bit(skb, OVS_VPORT_UPCALL_ATTR_SUCCESS, tx_success, OVS_VPORT_ATTR_PAD)) { nla_nest_cancel(skb, nla); return -EMSGSIZE; } if (nla_put_u64_64bit(skb, OVS_VPORT_UPCALL_ATTR_FAIL, tx_fail, OVS_VPORT_ATTR_PAD)) { nla_nest_cancel(skb, nla); return -EMSGSIZE; } nla_nest_end(skb, nla); return 0; } /** * ovs_vport_get_options - retrieve device options * * @vport: vport from which to retrieve the options. * @skb: sk_buff where options should be appended. * * Retrieves the configuration of the given device, appending an * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested * vport-specific attributes to @skb. * * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another * negative error code if a real error occurred. If an error occurs, @skb is * left unmodified. * * Must be called with ovs_mutex or rcu_read_lock. */ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) { struct nlattr *nla; int err; if (!vport->ops->get_options) return 0; nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_OPTIONS); if (!nla) return -EMSGSIZE; err = vport->ops->get_options(vport, skb); if (err) { nla_nest_cancel(skb, nla); return err; } nla_nest_end(skb, nla); return 0; } /** * ovs_vport_set_upcall_portids - set upcall portids of @vport. * * @vport: vport to modify. * @ids: new configuration, an array of port ids. * * Sets the vport's upcall_portids to @ids. * * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed * as an array of U32. * * Must be called with ovs_mutex. */ int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids) { struct vport_portids *old, *vport_portids; if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) return -EINVAL; old = ovsl_dereference(vport->upcall_portids); vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids), GFP_KERNEL); if (!vport_portids) return -ENOMEM; vport_portids->n_ids = nla_len(ids) / sizeof(u32); vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids); nla_memcpy(vport_portids->ids, ids, nla_len(ids)); rcu_assign_pointer(vport->upcall_portids, vport_portids); if (old) kfree_rcu(old, rcu); return 0; } /** * ovs_vport_get_upcall_portids - get the upcall_portids of @vport. * * @vport: vport from which to retrieve the portids. * @skb: sk_buff where portids should be appended. * * Retrieves the configuration of the given vport, appending the * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall * portids to @skb. * * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room. * If an error occurs, @skb is left unmodified. Must be called with * ovs_mutex or rcu_read_lock. */ int ovs_vport_get_upcall_portids(const struct vport *vport, struct sk_buff *skb) { struct vport_portids *ids; ids = rcu_dereference_ovsl(vport->upcall_portids); if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS) return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->n_ids * sizeof(u32), (void *)ids->ids); else return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]); } /** * ovs_vport_find_upcall_portid - find the upcall portid to send upcall. * * @vport: vport from which the missed packet is received. * @skb: skb that the missed packet was received. * * Uses the skb_get_hash() to select the upcall portid to send the * upcall. * * Returns the portid of the target socket. Must be called with rcu_read_lock. */ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb) { struct vport_portids *ids; u32 ids_index; u32 hash; ids = rcu_dereference(vport->upcall_portids); /* If there is only one portid, select it in the fast-path. */ if (ids->n_ids == 1) return ids->ids[0]; hash = skb_get_hash(skb); ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids); return ids->ids[ids_index]; } /** * ovs_vport_receive - pass up received packet to the datapath for processing * * @vport: vport that received the packet * @skb: skb that was received * @tun_info: tunnel (if any) that carried packet * * Must be called with rcu_read_lock. The packet cannot be shared and * skb->data should point to the Ethernet header. */ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, const struct ip_tunnel_info *tun_info) { struct sw_flow_key key; int error; OVS_CB(skb)->input_vport = vport; OVS_CB(skb)->mru = 0; OVS_CB(skb)->cutlen = 0; OVS_CB(skb)->probability = 0; if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { u32 mark; mark = skb->mark; skb_scrub_packet(skb, true); skb->mark = mark; tun_info = NULL; } /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_key_extract(tun_info, skb, &key); if (unlikely(error)) { kfree_skb(skb); return error; } ovs_dp_process_packet(skb, &key); return 0; } static int packet_length(const struct sk_buff *skb, struct net_device *dev) { int length = skb->len - dev->hard_header_len; if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol)) length -= VLAN_HLEN; /* Don't subtract for multiple VLAN tags. Most (all?) drivers allow * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none * account for 802.1ad. e.g. is_skb_forwardable(). */ return length > 0 ? length : 0; } void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) { int mtu = vport->dev->mtu; switch (vport->dev->type) { case ARPHRD_NONE: if (mac_proto == MAC_PROTO_ETHERNET) { skb_reset_network_header(skb); skb_reset_mac_len(skb); skb->protocol = htons(ETH_P_TEB); } else if (mac_proto != MAC_PROTO_NONE) { WARN_ON_ONCE(1); goto drop; } break; case ARPHRD_ETHER: if (mac_proto != MAC_PROTO_ETHERNET) goto drop; break; default: goto drop; } if (unlikely(packet_length(skb, vport->dev) > mtu && !skb_is_gso(skb))) { vport->dev->stats.tx_errors++; if (vport->dev->flags & IFF_UP) net_warn_ratelimited("%s: dropped over-mtu packet: " "%d > %d\n", vport->dev->name, packet_length(skb, vport->dev), mtu); goto drop; } skb->dev = vport->dev; skb_clear_tstamp(skb); vport->ops->send(skb); return; drop: kfree_skb(skb); }
70 6 5 77 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM snd_pcm #define TRACE_INCLUDE_FILE pcm_trace #if !defined(_PCM_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) #define _PCM_TRACE_H #include <linux/tracepoint.h> TRACE_EVENT(hwptr, TP_PROTO(struct snd_pcm_substream *substream, snd_pcm_uframes_t pos, bool irq), TP_ARGS(substream, pos, irq), TP_STRUCT__entry( __field( bool, in_interrupt ) __field( unsigned int, card ) __field( unsigned int, device ) __field( unsigned int, number ) __field( unsigned int, stream ) __field( snd_pcm_uframes_t, pos ) __field( snd_pcm_uframes_t, period_size ) __field( snd_pcm_uframes_t, buffer_size ) __field( snd_pcm_uframes_t, old_hw_ptr ) __field( snd_pcm_uframes_t, hw_ptr_base ) ), TP_fast_assign( __entry->in_interrupt = (irq); __entry->card = (substream)->pcm->card->number; __entry->device = (substream)->pcm->device; __entry->number = (substream)->number; __entry->stream = (substream)->stream; __entry->pos = (pos); __entry->period_size = (substream)->runtime->period_size; __entry->buffer_size = (substream)->runtime->buffer_size; __entry->old_hw_ptr = (substream)->runtime->status->hw_ptr; __entry->hw_ptr_base = (substream)->runtime->hw_ptr_base; ), TP_printk("pcmC%dD%d%s/sub%d: %s: pos=%lu, old=%lu, base=%lu, period=%lu, buf=%lu", __entry->card, __entry->device, __entry->stream == SNDRV_PCM_STREAM_PLAYBACK ? "p" : "c", __entry->number, __entry->in_interrupt ? "IRQ" : "POS", (unsigned long)__entry->pos, (unsigned long)__entry->old_hw_ptr, (unsigned long)__entry->hw_ptr_base, (unsigned long)__entry->period_size, (unsigned long)__entry->buffer_size) ); TRACE_EVENT(xrun, TP_PROTO(struct snd_pcm_substream *substream), TP_ARGS(substream), TP_STRUCT__entry( __field( unsigned int, card ) __field( unsigned int, device ) __field( unsigned int, number ) __field( unsigned int, stream ) __field( snd_pcm_uframes_t, period_size ) __field( snd_pcm_uframes_t, buffer_size ) __field( snd_pcm_uframes_t, old_hw_ptr ) __field( snd_pcm_uframes_t, hw_ptr_base ) ), TP_fast_assign( __entry->card = (substream)->pcm->card->number; __entry->device = (substream)->pcm->device; __entry->number = (substream)->number; __entry->stream = (substream)->stream; __entry->period_size = (substream)->runtime->period_size; __entry->buffer_size = (substream)->runtime->buffer_size; __entry->old_hw_ptr = (substream)->runtime->status->hw_ptr; __entry->hw_ptr_base = (substream)->runtime->hw_ptr_base; ), TP_printk("pcmC%dD%d%s/sub%d: XRUN: old=%lu, base=%lu, period=%lu, buf=%lu", __entry->card, __entry->device, __entry->stream == SNDRV_PCM_STREAM_PLAYBACK ? "p" : "c", __entry->number, (unsigned long)__entry->old_hw_ptr, (unsigned long)__entry->hw_ptr_base, (unsigned long)__entry->period_size, (unsigned long)__entry->buffer_size) ); TRACE_EVENT(hw_ptr_error, TP_PROTO(struct snd_pcm_substream *substream, const char *why), TP_ARGS(substream, why), TP_STRUCT__entry( __field( unsigned int, card ) __field( unsigned int, device ) __field( unsigned int, number ) __field( unsigned int, stream ) __string( reason, why ) ), TP_fast_assign( __entry->card = (substream)->pcm->card->number; __entry->device = (substream)->pcm->device; __entry->number = (substream)->number; __entry->stream = (substream)->stream; __assign_str(reason); ), TP_printk("pcmC%dD%d%s/sub%d: ERROR: %s", __entry->card, __entry->device, __entry->stream == SNDRV_PCM_STREAM_PLAYBACK ? "p" : "c", __entry->number, __get_str(reason)) ); TRACE_EVENT(applptr, TP_PROTO(struct snd_pcm_substream *substream, snd_pcm_uframes_t prev, snd_pcm_uframes_t curr), TP_ARGS(substream, prev, curr), TP_STRUCT__entry( __field( unsigned int, card ) __field( unsigned int, device ) __field( unsigned int, number ) __field( unsigned int, stream ) __field( snd_pcm_uframes_t, prev ) __field( snd_pcm_uframes_t, curr ) __field( snd_pcm_uframes_t, avail ) __field( snd_pcm_uframes_t, period_size ) __field( snd_pcm_uframes_t, buffer_size ) ), TP_fast_assign( __entry->card = (substream)->pcm->card->number; __entry->device = (substream)->pcm->device; __entry->number = (substream)->number; __entry->stream = (substream)->stream; __entry->prev = (prev); __entry->curr = (curr); __entry->avail = (substream)->stream ? snd_pcm_capture_avail(substream->runtime) : snd_pcm_playback_avail(substream->runtime); __entry->period_size = (substream)->runtime->period_size; __entry->buffer_size = (substream)->runtime->buffer_size; ), TP_printk("pcmC%dD%d%s/sub%d: prev=%lu, curr=%lu, avail=%lu, period=%lu, buf=%lu", __entry->card, __entry->device, __entry->stream ? "c" : "p", __entry->number, __entry->prev, __entry->curr, __entry->avail, __entry->period_size, __entry->buffer_size ) ); #endif /* _PCM_TRACE_H */ /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #include <trace/define_trace.h>
3 5 4 3 2 3 3 1 2 2 1 9 9 2 8 8 8 1 7 7 7 7 6 3 5 7 5 5 5 7 7 3 3 3 3 3 3 99 43 64 64 100 9 6 1 40 35 35 9 1 17 8 15 7 7 6 6 10 10 8 7 7 7 1 5 5 5 42 4 4 5 1 2 97 3 96 2 2 2 16 2 2 14 6 8 7 3 8 7 4 14 11 4 14 2 1 2 14 4 14 20 20 20 13 13 5 4 4 11 19 18 17 3 17 17 15 16 3 3 16 16 16 16 14 16 16 16 16 3 19 81 25 5 77 25 24 25 25 25 4 3 2 1 77 81 3 4 3 3 3 3 3 3 3 3 1 4 4 15 10 14 50 14 1 1 1 1 51 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 * Copyright (C) Christoph Hellwig, 2002 */ #include <linux/capability.h> #include <linux/fs.h> #include <linux/xattr.h> #include <linux/posix_acl_xattr.h> #include <linux/slab.h> #include <linux/quotaops.h> #include <linux/security.h> #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_dmap.h" #include "jfs_debug.h" #include "jfs_dinode.h" #include "jfs_extent.h" #include "jfs_metapage.h" #include "jfs_xattr.h" #include "jfs_acl.h" /* * jfs_xattr.c: extended attribute service * * Overall design -- * * Format: * * Extended attribute lists (jfs_ea_list) consist of an overall size (32 bit * value) and a variable (0 or more) number of extended attribute * entries. Each extended attribute entry (jfs_ea) is a <name,value> double * where <name> is constructed from a null-terminated ascii string * (1 ... 255 bytes in the name) and <value> is arbitrary 8 bit data * (1 ... 65535 bytes). The in-memory format is * * 0 1 2 4 4 + namelen + 1 * +-------+--------+--------+----------------+-------------------+ * | Flags | Name | Value | Name String \0 | Data . . . . | * | | Length | Length | | | * +-------+--------+--------+----------------+-------------------+ * * A jfs_ea_list then is structured as * * 0 4 4 + EA_SIZE(ea1) * +------------+-------------------+--------------------+----- * | Overall EA | First FEA Element | Second FEA Element | ..... * | List Size | | | * +------------+-------------------+--------------------+----- * * On-disk: * * FEALISTs are stored on disk using blocks allocated by dbAlloc() and * written directly. An EA list may be in-lined in the inode if there is * sufficient room available. */ struct ea_buffer { int flag; /* Indicates what storage xattr points to */ int max_size; /* largest xattr that fits in current buffer */ dxd_t new_ea; /* dxd to replace ea when modifying xattr */ struct metapage *mp; /* metapage containing ea list */ struct jfs_ea_list *xattr; /* buffer containing ea list */ }; /* * ea_buffer.flag values */ #define EA_INLINE 0x0001 #define EA_EXTENT 0x0002 #define EA_NEW 0x0004 #define EA_MALLOC 0x0008 /* * Mapping of on-disk attribute names: for on-disk attribute names with an * unknown prefix (not "system.", "user.", "security.", or "trusted."), the * prefix "os2." is prepended. On the way back to disk, "os2." prefixes are * stripped and we make sure that the remaining name does not start with one * of the know prefixes. */ static int is_known_namespace(const char *name) { if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) && strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) return false; return true; } static inline int name_size(struct jfs_ea *ea) { if (is_known_namespace(ea->name)) return ea->namelen; else return ea->namelen + XATTR_OS2_PREFIX_LEN; } static inline int copy_name(char *buffer, struct jfs_ea *ea) { int len = ea->namelen; if (!is_known_namespace(ea->name)) { memcpy(buffer, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN); buffer += XATTR_OS2_PREFIX_LEN; len += XATTR_OS2_PREFIX_LEN; } memcpy(buffer, ea->name, ea->namelen); buffer[ea->namelen] = 0; return len; } /* Forward references */ static void ea_release(struct inode *inode, struct ea_buffer *ea_buf); /* * NAME: ea_write_inline * * FUNCTION: Attempt to write an EA inline if area is available * * PRE CONDITIONS: * Already verified that the specified EA is small enough to fit inline * * PARAMETERS: * ip - Inode pointer * ealist - EA list pointer * size - size of ealist in bytes * ea - dxd_t structure to be filled in with necessary EA information * if we successfully copy the EA inline * * NOTES: * Checks if the inode's inline area is available. If so, copies EA inline * and sets <ea> fields appropriately. Otherwise, returns failure, EA will * have to be put into an extent. * * RETURNS: 0 for successful copy to inline area; -1 if area not available */ static int ea_write_inline(struct inode *ip, struct jfs_ea_list *ealist, int size, dxd_t * ea) { struct jfs_inode_info *ji = JFS_IP(ip); /* * Make sure we have an EA -- the NULL EA list is valid, but you * can't copy it! */ if (ealist && size > sizeof (struct jfs_ea_list)) { assert(size <= sizeof (ji->i_inline_ea)); /* * See if the space is available or if it is already being * used for an inline EA. */ if (!(ji->mode2 & INLINEEA) && !(ji->ea.flag & DXD_INLINE)) return -EPERM; DXDsize(ea, size); DXDlength(ea, 0); DXDaddress(ea, 0); memcpy(ji->i_inline_ea, ealist, size); ea->flag = DXD_INLINE; ji->mode2 &= ~INLINEEA; } else { ea->flag = 0; DXDsize(ea, 0); DXDlength(ea, 0); DXDaddress(ea, 0); /* Free up INLINE area */ if (ji->ea.flag & DXD_INLINE) ji->mode2 |= INLINEEA; } return 0; } /* * NAME: ea_write * * FUNCTION: Write an EA for an inode * * PRE CONDITIONS: EA has been verified * * PARAMETERS: * ip - Inode pointer * ealist - EA list pointer * size - size of ealist in bytes * ea - dxd_t structure to be filled in appropriately with where the * EA was copied * * NOTES: Will write EA inline if able to, otherwise allocates blocks for an * extent and synchronously writes it to those blocks. * * RETURNS: 0 for success; Anything else indicates failure */ static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size, dxd_t * ea) { struct super_block *sb = ip->i_sb; struct jfs_inode_info *ji = JFS_IP(ip); struct jfs_sb_info *sbi = JFS_SBI(sb); int nblocks; s64 blkno; int rc = 0, i; char *cp; s32 nbytes, nb; s32 bytes_to_write; struct metapage *mp; /* * Quick check to see if this is an in-linable EA. Short EAs * and empty EAs are all in-linable, provided the space exists. */ if (!ealist || size <= sizeof (ji->i_inline_ea)) { if (!ea_write_inline(ip, ealist, size, ea)) return 0; } /* figure out how many blocks we need */ nblocks = (size + (sb->s_blocksize - 1)) >> sb->s_blocksize_bits; /* Allocate new blocks to quota. */ rc = dquot_alloc_block(ip, nblocks); if (rc) return rc; rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno); if (rc) { /*Rollback quota allocation. */ dquot_free_block(ip, nblocks); return rc; } /* * Now have nblocks worth of storage to stuff into the FEALIST. * loop over the FEALIST copying data into the buffer one page at * a time. */ cp = (char *) ealist; nbytes = size; for (i = 0; i < nblocks; i += sbi->nbperpage) { /* * Determine how many bytes for this request, and round up to * the nearest aggregate block size */ nb = min(PSIZE, nbytes); bytes_to_write = ((((nb + sb->s_blocksize - 1)) >> sb->s_blocksize_bits)) << sb->s_blocksize_bits; if (!(mp = get_metapage(ip, blkno + i, bytes_to_write, 1))) { rc = -EIO; goto failed; } memcpy(mp->data, cp, nb); /* * We really need a way to propagate errors for * forced writes like this one. --hch * * (__write_metapage => release_metapage => flush_metapage) */ #ifdef _JFS_FIXME if ((rc = flush_metapage(mp))) { /* * the write failed -- this means that the buffer * is still assigned and the blocks are not being * used. this seems like the best error recovery * we can get ... */ goto failed; } #else flush_metapage(mp); #endif cp += PSIZE; nbytes -= nb; } ea->flag = DXD_EXTENT; DXDsize(ea, le32_to_cpu(ealist->size)); DXDlength(ea, nblocks); DXDaddress(ea, blkno); /* Free up INLINE area */ if (ji->ea.flag & DXD_INLINE) ji->mode2 |= INLINEEA; return 0; failed: /* Rollback quota allocation. */ dquot_free_block(ip, nblocks); dbFree(ip, blkno, nblocks); return rc; } /* * NAME: ea_read_inline * * FUNCTION: Read an inlined EA into user's buffer * * PARAMETERS: * ip - Inode pointer * ealist - Pointer to buffer to fill in with EA * * RETURNS: 0 */ static int ea_read_inline(struct inode *ip, struct jfs_ea_list *ealist) { struct jfs_inode_info *ji = JFS_IP(ip); int ea_size = sizeDXD(&ji->ea); if (ea_size == 0) { ealist->size = 0; return 0; } /* Sanity Check */ if ((sizeDXD(&ji->ea) > sizeof (ji->i_inline_ea))) return -EIO; if (le32_to_cpu(((struct jfs_ea_list *) &ji->i_inline_ea)->size) != ea_size) return -EIO; memcpy(ealist, ji->i_inline_ea, ea_size); return 0; } /* * NAME: ea_read * * FUNCTION: copy EA data into user's buffer * * PARAMETERS: * ip - Inode pointer * ealist - Pointer to buffer to fill in with EA * * NOTES: If EA is inline calls ea_read_inline() to copy EA. * * RETURNS: 0 for success; other indicates failure */ static int ea_read(struct inode *ip, struct jfs_ea_list *ealist) { struct super_block *sb = ip->i_sb; struct jfs_inode_info *ji = JFS_IP(ip); struct jfs_sb_info *sbi = JFS_SBI(sb); int nblocks; s64 blkno; char *cp = (char *) ealist; int i; int nbytes, nb; s32 bytes_to_read; struct metapage *mp; /* quick check for in-line EA */ if (ji->ea.flag & DXD_INLINE) return ea_read_inline(ip, ealist); nbytes = sizeDXD(&ji->ea); if (!nbytes) { jfs_error(sb, "nbytes is 0\n"); return -EIO; } /* * Figure out how many blocks were allocated when this EA list was * originally written to disk. */ nblocks = lengthDXD(&ji->ea) << sbi->l2nbperpage; blkno = addressDXD(&ji->ea) << sbi->l2nbperpage; /* * I have found the disk blocks which were originally used to store * the FEALIST. now i loop over each contiguous block copying the * data into the buffer. */ for (i = 0; i < nblocks; i += sbi->nbperpage) { /* * Determine how many bytes for this request, and round up to * the nearest aggregate block size */ nb = min(PSIZE, nbytes); bytes_to_read = ((((nb + sb->s_blocksize - 1)) >> sb->s_blocksize_bits)) << sb->s_blocksize_bits; if (!(mp = read_metapage(ip, blkno + i, bytes_to_read, 1))) return -EIO; memcpy(cp, mp->data, nb); release_metapage(mp); cp += PSIZE; nbytes -= nb; } return 0; } /* * NAME: ea_get * * FUNCTION: Returns buffer containing existing extended attributes. * The size of the buffer will be the larger of the existing * attributes size, or min_size. * * The buffer, which may be inlined in the inode or in the * page cache must be release by calling ea_release or ea_put * * PARAMETERS: * inode - Inode pointer * ea_buf - Structure to be populated with ealist and its metadata * min_size- minimum size of buffer to be returned * * RETURNS: 0 for success; Other indicates failure */ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) { struct jfs_inode_info *ji = JFS_IP(inode); struct super_block *sb = inode->i_sb; int size; int ea_size = sizeDXD(&ji->ea); int blocks_needed, current_blocks; s64 blkno; int rc; int quota_allocation = 0; memset(&ea_buf->new_ea, 0, sizeof(ea_buf->new_ea)); /* When fsck.jfs clears a bad ea, it doesn't clear the size */ if (ji->ea.flag == 0) ea_size = 0; if (ea_size == 0) { if (min_size == 0) { ea_buf->flag = 0; ea_buf->max_size = 0; ea_buf->xattr = NULL; return 0; } if ((min_size <= sizeof (ji->i_inline_ea)) && (ji->mode2 & INLINEEA)) { ea_buf->flag = EA_INLINE | EA_NEW; ea_buf->max_size = sizeof (ji->i_inline_ea); ea_buf->xattr = (struct jfs_ea_list *) ji->i_inline_ea; DXDlength(&ea_buf->new_ea, 0); DXDaddress(&ea_buf->new_ea, 0); ea_buf->new_ea.flag = DXD_INLINE; DXDsize(&ea_buf->new_ea, min_size); return 0; } current_blocks = 0; } else if (ji->ea.flag & DXD_INLINE) { if (min_size <= sizeof (ji->i_inline_ea)) { ea_buf->flag = EA_INLINE; ea_buf->max_size = sizeof (ji->i_inline_ea); ea_buf->xattr = (struct jfs_ea_list *) ji->i_inline_ea; goto size_check; } current_blocks = 0; } else { if (!(ji->ea.flag & DXD_EXTENT)) { jfs_error(sb, "invalid ea.flag\n"); return -EIO; } current_blocks = (ea_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; } size = max(min_size, ea_size); if (size > PSIZE) { /* * To keep the rest of the code simple. Allocate a * contiguous buffer to work with. Make the buffer large * enough to make use of the whole extent. */ ea_buf->max_size = (size + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1); ea_buf->xattr = kmalloc(ea_buf->max_size, GFP_KERNEL); if (ea_buf->xattr == NULL) return -ENOMEM; ea_buf->flag = EA_MALLOC; if (ea_size == 0) return 0; if ((rc = ea_read(inode, ea_buf->xattr))) { kfree(ea_buf->xattr); ea_buf->xattr = NULL; return rc; } goto size_check; } blocks_needed = (min_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; if (blocks_needed > current_blocks) { /* Allocate new blocks to quota. */ rc = dquot_alloc_block(inode, blocks_needed); if (rc) return -EDQUOT; quota_allocation = blocks_needed; rc = dbAlloc(inode, INOHINT(inode), (s64) blocks_needed, &blkno); if (rc) goto clean_up; DXDlength(&ea_buf->new_ea, blocks_needed); DXDaddress(&ea_buf->new_ea, blkno); ea_buf->new_ea.flag = DXD_EXTENT; DXDsize(&ea_buf->new_ea, min_size); ea_buf->flag = EA_EXTENT | EA_NEW; ea_buf->mp = get_metapage(inode, blkno, blocks_needed << sb->s_blocksize_bits, 1); if (ea_buf->mp == NULL) { dbFree(inode, blkno, (s64) blocks_needed); rc = -EIO; goto clean_up; } ea_buf->xattr = ea_buf->mp->data; ea_buf->max_size = (min_size + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1); if (ea_size == 0) return 0; if ((rc = ea_read(inode, ea_buf->xattr))) { discard_metapage(ea_buf->mp); dbFree(inode, blkno, (s64) blocks_needed); goto clean_up; } goto size_check; } ea_buf->flag = EA_EXTENT; ea_buf->mp = read_metapage(inode, addressDXD(&ji->ea), lengthDXD(&ji->ea) << sb->s_blocksize_bits, 1); if (ea_buf->mp == NULL) { rc = -EIO; goto clean_up; } ea_buf->xattr = ea_buf->mp->data; ea_buf->max_size = (ea_size + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1); size_check: if (EALIST_SIZE(ea_buf->xattr) != ea_size) { int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr)); printk(KERN_ERR "ea_get: invalid extended attribute\n"); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, ea_buf->xattr, size, 1); ea_release(inode, ea_buf); rc = -EIO; goto clean_up; } return ea_size; clean_up: /* Rollback quota allocation */ if (quota_allocation) dquot_free_block(inode, quota_allocation); return (rc); } static void ea_release(struct inode *inode, struct ea_buffer *ea_buf) { if (ea_buf->flag & EA_MALLOC) kfree(ea_buf->xattr); else if (ea_buf->flag & EA_EXTENT) { assert(ea_buf->mp); release_metapage(ea_buf->mp); if (ea_buf->flag & EA_NEW) dbFree(inode, addressDXD(&ea_buf->new_ea), lengthDXD(&ea_buf->new_ea)); } } static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf, int new_size) { struct jfs_inode_info *ji = JFS_IP(inode); unsigned long old_blocks, new_blocks; int rc = 0; if (new_size == 0) { ea_release(inode, ea_buf); ea_buf = NULL; } else if (ea_buf->flag & EA_INLINE) { assert(new_size <= sizeof (ji->i_inline_ea)); ji->mode2 &= ~INLINEEA; ea_buf->new_ea.flag = DXD_INLINE; DXDsize(&ea_buf->new_ea, new_size); DXDaddress(&ea_buf->new_ea, 0); DXDlength(&ea_buf->new_ea, 0); } else if (ea_buf->flag & EA_MALLOC) { rc = ea_write(inode, ea_buf->xattr, new_size, &ea_buf->new_ea); kfree(ea_buf->xattr); } else if (ea_buf->flag & EA_NEW) { /* We have already allocated a new dxd */ flush_metapage(ea_buf->mp); } else { /* ->xattr must point to original ea's metapage */ rc = ea_write(inode, ea_buf->xattr, new_size, &ea_buf->new_ea); discard_metapage(ea_buf->mp); } if (rc) return rc; old_blocks = new_blocks = 0; if (ji->ea.flag & DXD_EXTENT) { invalidate_dxd_metapages(inode, ji->ea); old_blocks = lengthDXD(&ji->ea); } if (ea_buf) { txEA(tid, inode, &ji->ea, &ea_buf->new_ea); if (ea_buf->new_ea.flag & DXD_EXTENT) { new_blocks = lengthDXD(&ea_buf->new_ea); if (ji->ea.flag & DXD_INLINE) ji->mode2 |= INLINEEA; } ji->ea = ea_buf->new_ea; } else { txEA(tid, inode, &ji->ea, NULL); if (ji->ea.flag & DXD_INLINE) ji->mode2 |= INLINEEA; ji->ea.flag = 0; ji->ea.size = 0; } /* If old blocks exist, they must be removed from quota allocation. */ if (old_blocks) dquot_free_block(inode, old_blocks); inode_set_ctime_current(inode); return 0; } int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name, const void *value, size_t value_len, int flags) { struct jfs_ea_list *ealist; struct jfs_ea *ea, *old_ea = NULL, *next_ea = NULL; struct ea_buffer ea_buf; int old_ea_size = 0; int xattr_size; int new_size; int namelen = strlen(name); int found = 0; int rc; int length; down_write(&JFS_IP(inode)->xattr_sem); xattr_size = ea_get(inode, &ea_buf, 0); if (xattr_size < 0) { rc = xattr_size; goto out; } again: ealist = (struct jfs_ea_list *) ea_buf.xattr; new_size = sizeof (struct jfs_ea_list); if (xattr_size) { for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) { if ((namelen == ea->namelen) && (memcmp(name, ea->name, namelen) == 0)) { found = 1; if (flags & XATTR_CREATE) { rc = -EEXIST; goto release; } old_ea = ea; old_ea_size = EA_SIZE(ea); next_ea = NEXT_EA(ea); } else new_size += EA_SIZE(ea); } } if (!found) { if (flags & XATTR_REPLACE) { rc = -ENODATA; goto release; } if (value == NULL) { rc = 0; goto release; } } if (value) new_size += sizeof (struct jfs_ea) + namelen + 1 + value_len; if (new_size > ea_buf.max_size) { /* * We need to allocate more space for merged ea list. * We should only have loop to again: once. */ ea_release(inode, &ea_buf); xattr_size = ea_get(inode, &ea_buf, new_size); if (xattr_size < 0) { rc = xattr_size; goto out; } goto again; } /* Remove old ea of the same name */ if (found) { /* number of bytes following target EA */ length = (char *) END_EALIST(ealist) - (char *) next_ea; if (length > 0) memmove(old_ea, next_ea, length); xattr_size -= old_ea_size; } /* Add new entry to the end */ if (value) { if (xattr_size == 0) /* Completely new ea list */ xattr_size = sizeof (struct jfs_ea_list); /* * The size of EA value is limitted by on-disk format up to * __le16, there would be an overflow if the size is equal * to XATTR_SIZE_MAX (65536). In order to avoid this issue, * we can pre-checkup the value size against USHRT_MAX, and * return -E2BIG in this case, which is consistent with the * VFS setxattr interface. */ if (value_len >= USHRT_MAX) { rc = -E2BIG; goto release; } ea = (struct jfs_ea *) ((char *) ealist + xattr_size); ea->flag = 0; ea->namelen = namelen; ea->valuelen = (cpu_to_le16(value_len)); memcpy(ea->name, name, namelen); ea->name[namelen] = 0; if (value_len) memcpy(&ea->name[namelen + 1], value, value_len); xattr_size += EA_SIZE(ea); } /* DEBUG - If we did this right, these number match */ if (xattr_size != new_size) { printk(KERN_ERR "__jfs_setxattr: xattr_size = %d, new_size = %d\n", xattr_size, new_size); rc = -EINVAL; goto release; } /* * If we're left with an empty list, there's no ea */ if (new_size == sizeof (struct jfs_ea_list)) new_size = 0; ealist->size = cpu_to_le32(new_size); rc = ea_put(tid, inode, &ea_buf, new_size); goto out; release: ea_release(inode, &ea_buf); out: up_write(&JFS_IP(inode)->xattr_sem); return rc; } ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, size_t buf_size) { struct jfs_ea_list *ealist; struct jfs_ea *ea, *ealist_end; struct ea_buffer ea_buf; int xattr_size; ssize_t size; int namelen = strlen(name); char *value; down_read(&JFS_IP(inode)->xattr_sem); xattr_size = ea_get(inode, &ea_buf, 0); if (xattr_size < 0) { size = xattr_size; goto out; } if (xattr_size == 0) goto not_found; ealist = (struct jfs_ea_list *) ea_buf.xattr; ealist_end = END_EALIST(ealist); /* Find the named attribute */ for (ea = FIRST_EA(ealist); ea < ealist_end; ea = NEXT_EA(ea)) { if (unlikely(ea + 1 > ealist_end) || unlikely(NEXT_EA(ea) > ealist_end)) { size = -EUCLEAN; goto release; } if ((namelen == ea->namelen) && memcmp(name, ea->name, namelen) == 0) { /* Found it */ size = le16_to_cpu(ea->valuelen); if (!data) goto release; else if (size > buf_size) { size = -ERANGE; goto release; } value = ((char *) &ea->name) + ea->namelen + 1; memcpy(data, value, size); goto release; } } not_found: size = -ENODATA; release: ea_release(inode, &ea_buf); out: up_read(&JFS_IP(inode)->xattr_sem); return size; } /* * No special permissions are needed to list attributes except for trusted.* */ static inline int can_list(struct jfs_ea *ea) { return (strncmp(ea->name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || capable(CAP_SYS_ADMIN)); } ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size) { struct inode *inode = d_inode(dentry); char *buffer; ssize_t size = 0; int xattr_size; struct jfs_ea_list *ealist; struct jfs_ea *ea, *ealist_end; struct ea_buffer ea_buf; down_read(&JFS_IP(inode)->xattr_sem); xattr_size = ea_get(inode, &ea_buf, 0); if (xattr_size < 0) { size = xattr_size; goto out; } if (xattr_size == 0) goto release; ealist = (struct jfs_ea_list *) ea_buf.xattr; ealist_end = END_EALIST(ealist); /* compute required size of list */ for (ea = FIRST_EA(ealist); ea < ealist_end; ea = NEXT_EA(ea)) { if (unlikely(ea + 1 > ealist_end) || unlikely(NEXT_EA(ea) > ealist_end)) { size = -EUCLEAN; goto release; } if (can_list(ea)) size += name_size(ea) + 1; } if (!data) goto release; if (size > buf_size) { size = -ERANGE; goto release; } /* Copy attribute names to buffer */ buffer = data; for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) { if (can_list(ea)) { int namelen = copy_name(buffer, ea); buffer += namelen + 1; } } release: ea_release(inode, &ea_buf); out: up_read(&JFS_IP(inode)->xattr_sem); return size; } static int __jfs_xattr_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { struct jfs_inode_info *ji = JFS_IP(inode); tid_t tid; int rc; tid = txBegin(inode->i_sb, 0); mutex_lock(&ji->commit_mutex); rc = __jfs_setxattr(tid, inode, name, value, size, flags); if (!rc) rc = txCommit(tid, 1, &inode, 0); txEnd(tid); mutex_unlock(&ji->commit_mutex); return rc; } static int jfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *value, size_t size) { name = xattr_full_name(handler, name); return __jfs_getxattr(inode, name, value, size); } static int jfs_xattr_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) { name = xattr_full_name(handler, name); return __jfs_xattr_set(inode, name, value, size, flags); } static int jfs_xattr_get_os2(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *value, size_t size) { if (is_known_namespace(name)) return -EOPNOTSUPP; return __jfs_getxattr(inode, name, value, size); } static int jfs_xattr_set_os2(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) { if (is_known_namespace(name)) return -EOPNOTSUPP; return __jfs_xattr_set(inode, name, value, size, flags); } static const struct xattr_handler jfs_user_xattr_handler = { .prefix = XATTR_USER_PREFIX, .get = jfs_xattr_get, .set = jfs_xattr_set, }; static const struct xattr_handler jfs_os2_xattr_handler = { .prefix = XATTR_OS2_PREFIX, .get = jfs_xattr_get_os2, .set = jfs_xattr_set_os2, }; static const struct xattr_handler jfs_security_xattr_handler = { .prefix = XATTR_SECURITY_PREFIX, .get = jfs_xattr_get, .set = jfs_xattr_set, }; static const struct xattr_handler jfs_trusted_xattr_handler = { .prefix = XATTR_TRUSTED_PREFIX, .get = jfs_xattr_get, .set = jfs_xattr_set, }; const struct xattr_handler * const jfs_xattr_handlers[] = { &jfs_os2_xattr_handler, &jfs_user_xattr_handler, &jfs_security_xattr_handler, &jfs_trusted_xattr_handler, NULL, }; #ifdef CONFIG_JFS_SECURITY static int jfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, void *fs_info) { const struct xattr *xattr; tid_t *tid = fs_info; char *name; int err = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { name = kmalloc(XATTR_SECURITY_PREFIX_LEN + strlen(xattr->name) + 1, GFP_NOFS); if (!name) { err = -ENOMEM; break; } strcpy(name, XATTR_SECURITY_PREFIX); strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); err = __jfs_setxattr(*tid, inode, name, xattr->value, xattr->value_len, 0); kfree(name); if (err < 0) break; } return err; } int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir, const struct qstr *qstr) { return security_inode_init_security(inode, dir, qstr, &jfs_initxattrs, &tid); } #endif
36 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHEFS_SUBVOLUME_FORMAT_H #define _BCACHEFS_SUBVOLUME_FORMAT_H #define SUBVOL_POS_MIN POS(0, 1) #define SUBVOL_POS_MAX POS(0, S32_MAX) #define BCACHEFS_ROOT_SUBVOL 1 struct bch_subvolume { struct bch_val v; __le32 flags; __le32 snapshot; __le64 inode; /* * Snapshot subvolumes form a tree, separate from the snapshot nodes * tree - if this subvolume is a snapshot, this is the ID of the * subvolume it was created from: * * This is _not_ necessarily the subvolume of the directory containing * this subvolume: */ __le32 creation_parent; __le32 fs_path_parent; bch_le128 otime; }; LE32_BITMASK(BCH_SUBVOLUME_RO, struct bch_subvolume, flags, 0, 1) /* * We need to know whether a subvolume is a snapshot so we can know whether we * can delete it (or whether it should just be rm -rf'd) */ LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2) LE32_BITMASK(BCH_SUBVOLUME_UNLINKED, struct bch_subvolume, flags, 2, 3) #endif /* _BCACHEFS_SUBVOLUME_FORMAT_H */
11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 /* SPDX-License-Identifier: GPL-2.0 */ /* * XDR standard data types and function declarations * * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> * * Based on: * RFC 4506 "XDR: External Data Representation Standard", May 2006 */ #ifndef _SUNRPC_XDR_H_ #define _SUNRPC_XDR_H_ #include <linux/uio.h> #include <asm/byteorder.h> #include <linux/unaligned.h> #include <linux/scatterlist.h> struct bio_vec; struct rpc_rqst; /* * Size of an XDR encoding unit in bytes, i.e. 32 bits, * as defined in Section 3 of RFC 4506. All encoded * XDR data items are aligned on a boundary of 32 bits. */ #define XDR_UNIT sizeof(__be32) /* * Buffer adjustment */ #define XDR_QUADLEN(l) (((l) + 3) >> 2) /* * Generic opaque `network object.' */ #define XDR_MAX_NETOBJ 1024 struct xdr_netobj { unsigned int len; u8 * data; }; /* * Basic structure for transmission/reception of a client XDR message. * Features a header (for a linear buffer containing RPC headers * and the data payload for short messages), and then an array of * pages. * The tail iovec allows you to append data after the page array. Its * main interest is for appending padding to the pages in order to * satisfy the int_32-alignment requirements in RFC1832. * * For the future, we might want to string several of these together * in a list if anybody wants to make use of NFSv4 COMPOUND * operations and/or has a need for scatter/gather involving pages. */ struct xdr_buf { struct kvec head[1], /* RPC header + non-page data */ tail[1]; /* Appended after page data */ struct bio_vec *bvec; struct page ** pages; /* Array of pages */ unsigned int page_base, /* Start of page data */ page_len, /* Length of page data */ flags; /* Flags for data disposition */ #define XDRBUF_READ 0x01 /* target of file read */ #define XDRBUF_WRITE 0x02 /* source of file write */ #define XDRBUF_SPARSE_PAGES 0x04 /* Page array is sparse */ unsigned int buflen, /* Total length of storage buffer */ len; /* Length of XDR encoded message */ }; static inline void xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) { buf->head[0].iov_base = start; buf->head[0].iov_len = len; buf->tail[0].iov_len = 0; buf->pages = NULL; buf->page_len = 0; buf->flags = 0; buf->len = 0; buf->buflen = len; } /* * pre-xdr'ed macros. */ #define xdr_zero cpu_to_be32(0) #define xdr_one cpu_to_be32(1) #define xdr_two cpu_to_be32(2) #define rpc_auth_null cpu_to_be32(RPC_AUTH_NULL) #define rpc_auth_unix cpu_to_be32(RPC_AUTH_UNIX) #define rpc_auth_short cpu_to_be32(RPC_AUTH_SHORT) #define rpc_auth_gss cpu_to_be32(RPC_AUTH_GSS) #define rpc_auth_tls cpu_to_be32(RPC_AUTH_TLS) #define rpc_call cpu_to_be32(RPC_CALL) #define rpc_reply cpu_to_be32(RPC_REPLY) #define rpc_msg_accepted cpu_to_be32(RPC_MSG_ACCEPTED) #define rpc_success cpu_to_be32(RPC_SUCCESS) #define rpc_prog_unavail cpu_to_be32(RPC_PROG_UNAVAIL) #define rpc_prog_mismatch cpu_to_be32(RPC_PROG_MISMATCH) #define rpc_proc_unavail cpu_to_be32(RPC_PROC_UNAVAIL) #define rpc_garbage_args cpu_to_be32(RPC_GARBAGE_ARGS) #define rpc_system_err cpu_to_be32(RPC_SYSTEM_ERR) #define rpc_drop_reply cpu_to_be32(RPC_DROP_REPLY) #define rpc_mismatch cpu_to_be32(RPC_MISMATCH) #define rpc_auth_error cpu_to_be32(RPC_AUTH_ERROR) #define rpc_auth_ok cpu_to_be32(RPC_AUTH_OK) #define rpc_autherr_badcred cpu_to_be32(RPC_AUTH_BADCRED) #define rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED) #define rpc_autherr_badverf cpu_to_be32(RPC_AUTH_BADVERF) #define rpc_autherr_rejectedverf cpu_to_be32(RPC_AUTH_REJECTEDVERF) #define rpc_autherr_tooweak cpu_to_be32(RPC_AUTH_TOOWEAK) #define rpcsec_gsserr_credproblem cpu_to_be32(RPCSEC_GSS_CREDPROBLEM) #define rpcsec_gsserr_ctxproblem cpu_to_be32(RPCSEC_GSS_CTXPROBLEM) /* * Miscellaneous XDR helper functions */ __be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_string(__be32 *p, const char *s); __be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp, unsigned int maxlen); __be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); void xdr_terminate_string(const struct xdr_buf *, const u32); size_t xdr_buf_pagecount(const struct xdr_buf *buf); int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp); void xdr_free_bvec(struct xdr_buf *buf); unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size, const struct xdr_buf *xdr); static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len) { return xdr_encode_opaque(p, s, len); } /* * Decode 64bit quantities (NFSv3 support) */ static inline __be32 * xdr_encode_hyper(__be32 *p, __u64 val) { put_unaligned_be64(val, p); return p + 2; } static inline __be32 * xdr_decode_hyper(__be32 *p, __u64 *valp) { *valp = get_unaligned_be64(p); return p + 2; } static inline __be32 * xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len) { memcpy(ptr, p, len); return p + XDR_QUADLEN(len); } static inline void xdr_netobj_dup(struct xdr_netobj *dst, struct xdr_netobj *src, gfp_t gfp_mask) { dst->data = kmemdup(src->data, src->len, gfp_mask); dst->len = src->len; } /* * Adjust kvec to reflect end of xdr'ed data (RPC client XDR) */ static inline int xdr_adjust_iovec(struct kvec *iov, __be32 *p) { return iov->iov_len = ((u8 *) p - (u8 *) iov->iov_base); } /* * XDR buffer helper functions */ extern void xdr_buf_from_iov(const struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(const struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int read_bytes_from_xdr_buf(const struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(const struct xdr_buf *, unsigned int, void *, unsigned int); extern int xdr_encode_word(const struct xdr_buf *, unsigned int, u32); extern int xdr_decode_word(const struct xdr_buf *, unsigned int, u32 *); struct xdr_array2_desc; typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem); struct xdr_array2_desc { unsigned int elem_size; unsigned int array_len; unsigned int array_maxlen; xdr_xcode_elem_t xcode; }; extern int xdr_decode_array2(const struct xdr_buf *buf, unsigned int base, struct xdr_array2_desc *desc); extern int xdr_encode_array2(const struct xdr_buf *buf, unsigned int base, struct xdr_array2_desc *desc); extern void _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len); /* * Provide some simple tools for XDR buffer overflow-checking etc. */ struct xdr_stream { __be32 *p; /* start of available buffer */ struct xdr_buf *buf; /* XDR buffer to read/write */ __be32 *end; /* end of available buffer space */ struct kvec *iov; /* pointer to the current kvec */ struct kvec scratch; /* Scratch buffer */ struct page **page_ptr; /* pointer to the current page */ void *page_kaddr; /* kmapped address of the current page */ unsigned int nwords; /* Remaining decode buffer length */ struct rpc_rqst *rqst; /* For debugging */ }; /* * These are the xdr_stream style generic XDR encode and decode functions. */ typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, const void *obj); typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, struct rpc_rqst *rqst); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes); extern void __xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); extern void xdr_truncate_decode(struct xdr_stream *xdr, size_t len); extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); extern unsigned int xdr_page_pos(const struct xdr_stream *xdr); extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, unsigned int len); extern void xdr_finish_decode(struct xdr_stream *xdr); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(const struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); extern void xdr_set_pagelen(struct xdr_stream *, unsigned int len); extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, unsigned int len); extern unsigned int xdr_stream_move_subsegment(struct xdr_stream *xdr, unsigned int offset, unsigned int target, unsigned int length); extern unsigned int xdr_stream_zero(struct xdr_stream *xdr, unsigned int offset, unsigned int length); /** * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. * @xdr: pointer to xdr_stream struct * @buf: pointer to an empty buffer * @buflen: size of 'buf' * * The scratch buffer is used when decoding from an array of pages. * If an xdr_inline_decode() call spans across page boundaries, then * we copy the data into the scratch buffer in order to allow linear * access. */ static inline void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) { xdr->scratch.iov_base = buf; xdr->scratch.iov_len = buflen; } /** * xdr_set_scratch_page - Attach a scratch buffer for decoding data * @xdr: pointer to xdr_stream struct * @page: an anonymous page * * See xdr_set_scratch_buffer(). */ static inline void xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page) { xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE); } /** * xdr_reset_scratch_buffer - Clear scratch buffer information * @xdr: pointer to xdr_stream struct * * See xdr_set_scratch_buffer(). */ static inline void xdr_reset_scratch_buffer(struct xdr_stream *xdr) { xdr_set_scratch_buffer(xdr, NULL, 0); } /** * xdr_commit_encode - Ensure all data is written to xdr->buf * @xdr: pointer to xdr_stream * * Handle encoding across page boundaries by giving the caller a * temporary location to write to, then later copying the data into * place. __xdr_commit_encode() does that copying. */ static inline void xdr_commit_encode(struct xdr_stream *xdr) { if (unlikely(xdr->scratch.iov_len)) __xdr_commit_encode(xdr); } /** * xdr_stream_remaining - Return the number of bytes remaining in the stream * @xdr: pointer to struct xdr_stream * * Return value: * Number of bytes remaining in @xdr before xdr->end */ static inline size_t xdr_stream_remaining(const struct xdr_stream *xdr) { return xdr->nwords << 2; } ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, size_t size); ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr, size_t maxlen, gfp_t gfp_flags); ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, size_t size); ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, size_t maxlen, gfp_t gfp_flags); ssize_t xdr_stream_decode_opaque_auth(struct xdr_stream *xdr, u32 *flavor, void **body, unsigned int *body_len); ssize_t xdr_stream_encode_opaque_auth(struct xdr_stream *xdr, u32 flavor, void *body, unsigned int body_len); /** * xdr_align_size - Calculate padded size of an object * @n: Size of an object being XDR encoded (in bytes) * * Return value: * Size (in bytes) of the object including xdr padding */ static inline size_t xdr_align_size(size_t n) { const size_t mask = XDR_UNIT - 1; return (n + mask) & ~mask; } /** * xdr_pad_size - Calculate size of an object's pad * @n: Size of an object being XDR encoded (in bytes) * * This implementation avoids the need for conditional * branches or modulo division. * * Return value: * Size (in bytes) of the needed XDR pad */ static inline size_t xdr_pad_size(size_t n) { return xdr_align_size(n) - n; } /** * xdr_stream_encode_item_present - Encode a "present" list item * @xdr: pointer to xdr_stream * * Return values: * On success, returns length in bytes of XDR buffer consumed * %-EMSGSIZE on XDR buffer overflow */ static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr) { const size_t len = XDR_UNIT; __be32 *p = xdr_reserve_space(xdr, len); if (unlikely(!p)) return -EMSGSIZE; *p = xdr_one; return len; } /** * xdr_stream_encode_item_absent - Encode a "not present" list item * @xdr: pointer to xdr_stream * * Return values: * On success, returns length in bytes of XDR buffer consumed * %-EMSGSIZE on XDR buffer overflow */ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) { const size_t len = XDR_UNIT; __be32 *p = xdr_reserve_space(xdr, len); if (unlikely(!p)) return -EMSGSIZE; *p = xdr_zero; return len; } /** * xdr_encode_bool - Encode a boolean item * @p: address in a buffer into which to encode * @n: boolean value to encode * * Return value: * Address of item following the encoded boolean */ static inline __be32 *xdr_encode_bool(__be32 *p, u32 n) { *p++ = n ? xdr_one : xdr_zero; return p; } /** * xdr_stream_encode_bool - Encode a boolean item * @xdr: pointer to xdr_stream * @n: boolean value to encode * * Return values: * On success, returns length in bytes of XDR buffer consumed * %-EMSGSIZE on XDR buffer overflow */ static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n) { const size_t len = XDR_UNIT; __be32 *p = xdr_reserve_space(xdr, len); if (unlikely(!p)) return -EMSGSIZE; xdr_encode_bool(p, n); return len; } /** * xdr_stream_encode_u32 - Encode a 32-bit integer * @xdr: pointer to xdr_stream * @n: integer to encode * * Return values: * On success, returns length in bytes of XDR buffer consumed * %-EMSGSIZE on XDR buffer overflow */ static inline ssize_t xdr_stream_encode_u32(struct xdr_stream *xdr, __u32 n) { const size_t len = sizeof(n); __be32 *p = xdr_reserve_space(xdr, len); if (unlikely(!p)) return -EMSGSIZE; *p = cpu_to_be32(n); return len; } /** * xdr_stream_encode_be32 - Encode a big-endian 32-bit integer * @xdr: pointer to xdr_stream * @n: integer to encode * * Return values: * On success, returns length in bytes of XDR buffer consumed * %-EMSGSIZE on XDR buffer overflow */ static inline ssize_t xdr_stream_encode_be32(struct xdr_stream *xdr, __be32 n) { const size_t len = sizeof(n); __be32 *p = xdr_reserve_space(xdr, len); if (unlikely(!p)) return -EMSGSIZE; *p = n; return len; } /** * xdr_stream_encode_u64 - Encode a 64-bit integer * @xdr: pointer to xdr_stream * @n: 64-bit integer to encode * * Return values: * On success, returns length in bytes of XDR buffer consumed * %-EMSGSIZE on XDR buffer overflow */ static inline ssize_t xdr_stream_encode_u64(struct xdr_stream *xdr, __u64 n) { const size_t len = sizeof(n); __be32 *p = xdr_reserve_space(xdr, len); if (unlikely(!p)) return -EMSGSIZE; xdr_encode_hyper(p, n); return len; } /** * xdr_stream_encode_opaque_inline - Encode opaque xdr data * @xdr: pointer to xdr_stream * @ptr: pointer to void pointer * @len: size of object * * Return values: * On success, returns length in bytes of XDR buffer consumed * %-EMSGSIZE on XDR buffer overflow */ static inline ssize_t xdr_stream_encode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t len) { size_t count = sizeof(__u32) + xdr_align_size(len); __be32 *p = xdr_reserve_space(xdr, count); if (unlikely(!p)) { *ptr = NULL; return -EMSGSIZE; } xdr_encode_opaque(p, NULL, len); *ptr = ++p; return count; } /** * xdr_stream_encode_opaque_fixed - Encode fixed length opaque xdr data * @xdr: pointer to xdr_stream * @ptr: pointer to opaque data object * @len: size of object pointed to by @ptr * * Return values: * On success, returns length in bytes of XDR buffer consumed * %-EMSGSIZE on XDR buffer overflow */ static inline ssize_t xdr_stream_encode_opaque_fixed(struct xdr_stream *xdr, const void *ptr, size_t len) { __be32 *p = xdr_reserve_space(xdr, len); if (unlikely(!p)) return -EMSGSIZE; xdr_encode_opaque_fixed(p, ptr, len); return xdr_align_size(len); } /** * xdr_stream_encode_opaque - Encode variable length opaque xdr data * @xdr: pointer to xdr_stream * @ptr: pointer to opaque data object * @len: size of object pointed to by @ptr * * Return values: * On success, returns length in bytes of XDR buffer consumed * %-EMSGSIZE on XDR buffer overflow */ static inline ssize_t xdr_stream_encode_opaque(struct xdr_stream *xdr, const void *ptr, size_t len) { size_t count = sizeof(__u32) + xdr_align_size(len); __be32 *p = xdr_reserve_space(xdr, count); if (unlikely(!p)) return -EMSGSIZE; xdr_encode_opaque(p, ptr, len); return count; } /** * xdr_stream_encode_uint32_array - Encode variable length array of integers * @xdr: pointer to xdr_stream * @array: array of integers * @array_size: number of elements in @array * * Return values: * On success, returns length in bytes of XDR buffer consumed * %-EMSGSIZE on XDR buffer overflow */ static inline ssize_t xdr_stream_encode_uint32_array(struct xdr_stream *xdr, const __u32 *array, size_t array_size) { ssize_t ret = (array_size+1) * sizeof(__u32); __be32 *p = xdr_reserve_space(xdr, ret); if (unlikely(!p)) return -EMSGSIZE; *p++ = cpu_to_be32(array_size); for (; array_size > 0; p++, array++, array_size--) *p = cpu_to_be32p(array); return ret; } /** * xdr_item_is_absent - symbolically handle XDR discriminators * @p: pointer to undecoded discriminator * * Return values: * %true if the following XDR item is absent * %false if the following XDR item is present */ static inline bool xdr_item_is_absent(const __be32 *p) { return *p == xdr_zero; } /** * xdr_item_is_present - symbolically handle XDR discriminators * @p: pointer to undecoded discriminator * * Return values: * %true if the following XDR item is present * %false if the following XDR item is absent */ static inline bool xdr_item_is_present(const __be32 *p) { return *p != xdr_zero; } /** * xdr_stream_decode_bool - Decode a boolean * @xdr: pointer to xdr_stream * @ptr: pointer to a u32 in which to store the result * * Return values: * %0 on success * %-EBADMSG on XDR buffer overflow */ static inline ssize_t xdr_stream_decode_bool(struct xdr_stream *xdr, __u32 *ptr) { const size_t count = sizeof(*ptr); __be32 *p = xdr_inline_decode(xdr, count); if (unlikely(!p)) return -EBADMSG; *ptr = (*p != xdr_zero); return 0; } /** * xdr_stream_decode_u32 - Decode a 32-bit integer * @xdr: pointer to xdr_stream * @ptr: location to store integer * * Return values: * %0 on success * %-EBADMSG on XDR buffer overflow */ static inline ssize_t xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr) { const size_t count = sizeof(*ptr); __be32 *p = xdr_inline_decode(xdr, count); if (unlikely(!p)) return -EBADMSG; *ptr = be32_to_cpup(p); return 0; } /** * xdr_stream_decode_be32 - Decode a big-endian 32-bit integer * @xdr: pointer to xdr_stream * @ptr: location to store integer * * Return values: * %0 on success * %-EBADMSG on XDR buffer overflow */ static inline ssize_t xdr_stream_decode_be32(struct xdr_stream *xdr, __be32 *ptr) { const size_t count = sizeof(*ptr); __be32 *p = xdr_inline_decode(xdr, count); if (unlikely(!p)) return -EBADMSG; *ptr = *p; return 0; } /** * xdr_stream_decode_u64 - Decode a 64-bit integer * @xdr: pointer to xdr_stream * @ptr: location to store 64-bit integer * * Return values: * %0 on success * %-EBADMSG on XDR buffer overflow */ static inline ssize_t xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr) { const size_t count = sizeof(*ptr); __be32 *p = xdr_inline_decode(xdr, count); if (unlikely(!p)) return -EBADMSG; xdr_decode_hyper(p, ptr); return 0; } /** * xdr_stream_decode_opaque_fixed - Decode fixed length opaque xdr data * @xdr: pointer to xdr_stream * @ptr: location to store data * @len: size of buffer pointed to by @ptr * * Return values: * On success, returns size of object stored in @ptr * %-EBADMSG on XDR buffer overflow */ static inline ssize_t xdr_stream_decode_opaque_fixed(struct xdr_stream *xdr, void *ptr, size_t len) { __be32 *p = xdr_inline_decode(xdr, len); if (unlikely(!p)) return -EBADMSG; xdr_decode_opaque_fixed(p, ptr, len); return len; } /** * xdr_stream_decode_opaque_inline - Decode variable length opaque xdr data * @xdr: pointer to xdr_stream * @ptr: location to store pointer to opaque data * @maxlen: maximum acceptable object size * * Note: the pointer stored in @ptr cannot be assumed valid after the XDR * buffer has been destroyed, or even after calling xdr_inline_decode() * on @xdr. It is therefore expected that the object it points to should * be processed immediately. * * Return values: * On success, returns size of object stored in *@ptr * %-EBADMSG on XDR buffer overflow * %-EMSGSIZE if the size of the object would exceed @maxlen */ static inline ssize_t xdr_stream_decode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t maxlen) { __be32 *p; __u32 len; *ptr = NULL; if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; if (len != 0) { p = xdr_inline_decode(xdr, len); if (unlikely(!p)) return -EBADMSG; if (unlikely(len > maxlen)) return -EMSGSIZE; *ptr = p; } return len; } /** * xdr_stream_decode_uint32_array - Decode variable length array of integers * @xdr: pointer to xdr_stream * @array: location to store the integer array or NULL * @array_size: number of elements to store * * Return values: * On success, returns number of elements stored in @array * %-EBADMSG on XDR buffer overflow * %-EMSGSIZE if the size of the array exceeds @array_size */ static inline ssize_t xdr_stream_decode_uint32_array(struct xdr_stream *xdr, __u32 *array, size_t array_size) { __be32 *p; __u32 len; ssize_t retval; if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; if (U32_MAX >= SIZE_MAX / sizeof(*p) && len > SIZE_MAX / sizeof(*p)) return -EBADMSG; p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) return -EBADMSG; if (array == NULL) return len; if (len <= array_size) { if (len < array_size) memset(array+len, 0, (array_size-len)*sizeof(*array)); array_size = len; retval = len; } else retval = -EMSGSIZE; for (; array_size > 0; p++, array++, array_size--) *array = be32_to_cpup(p); return retval; } #endif /* _SUNRPC_XDR_H_ */
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 // SPDX-License-Identifier: GPL-2.0+ /* * Special Initializers for certain USB Mass Storage devices * * Current development and maintenance by: * (c) 1999, 2000 Matthew Dharm (mdharm-usb@one-eyed-alien.net) * * This driver is based on the 'USB Mass Storage Class' document. This * describes in detail the protocol used to communicate with such * devices. Clearly, the designers had SCSI and ATAPI commands in * mind when they created this document. The commands are all very * similar to commands in the SCSI-II and ATAPI specifications. * * It is important to note that in a number of cases this class * exhibits class-specific exemptions from the USB specification. * Notably the usage of NAK, STALL and ACK differs from the norm, in * that they are used to communicate wait, failed and OK on commands. * * Also, for certain devices, the interrupt endpoint is used to convey * status of a command. */ #include <linux/errno.h> #include "usb.h" #include "initializers.h" #include "debug.h" #include "transport.h" /* * This places the Shuttle/SCM USB<->SCSI bridge devices in multi-target * mode */ int usb_stor_euscsi_init(struct us_data *us) { int result; usb_stor_dbg(us, "Attempting to init eUSCSI bridge...\n"); result = usb_stor_control_msg(us, us->send_ctrl_pipe, 0x0C, USB_RECIP_INTERFACE | USB_TYPE_VENDOR, 0x01, 0x0, NULL, 0x0, 5 * HZ); usb_stor_dbg(us, "-- result is %d\n", result); return 0; } /* * This function is required to activate all four slots on the UCR-61S2B * flash reader */ int usb_stor_ucr61s2b_init(struct us_data *us) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap*) us->iobuf; struct bulk_cs_wrap *bcs = (struct bulk_cs_wrap*) us->iobuf; int res; unsigned int partial; static char init_string[] = "\xec\x0a\x06\x00$PCCHIPS"; usb_stor_dbg(us, "Sending UCR-61S2B initialization packet...\n"); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->Tag = 0; bcb->DataTransferLength = cpu_to_le32(0); bcb->Flags = bcb->Lun = 0; bcb->Length = sizeof(init_string) - 1; memset(bcb->CDB, 0, sizeof(bcb->CDB)); memcpy(bcb->CDB, init_string, sizeof(init_string) - 1); res = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, bcb, US_BULK_CB_WRAP_LEN, &partial); if (res) return -EIO; usb_stor_dbg(us, "Getting status packet...\n"); res = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, bcs, US_BULK_CS_WRAP_LEN, &partial); if (res) return -EIO; return 0; } /* This places the HUAWEI E220 devices in multi-port mode */ int usb_stor_huawei_e220_init(struct us_data *us) { int result; result = usb_stor_control_msg(us, us->send_ctrl_pipe, USB_REQ_SET_FEATURE, USB_TYPE_STANDARD | USB_RECIP_DEVICE, 0x01, 0x0, NULL, 0x0, 1 * HZ); usb_stor_dbg(us, "Huawei mode set result is %d\n", result); return 0; }
3 2 1 4 3 4 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 // SPDX-License-Identifier: MIT /* * Copyright 2020 Noralf Trønnes */ #include <linux/dma-buf.h> #include <linux/dma-mapping.h> #include <linux/lz4.h> #include <linux/module.h> #include <linux/platform_device.h> #include <linux/string_helpers.h> #include <linux/usb.h> #include <linux/vmalloc.h> #include <linux/workqueue.h> #include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_blend.h> #include <drm/drm_damage_helper.h> #include <drm/drm_debugfs.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_shmem.h> #include <drm/drm_fourcc.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_managed.h> #include <drm/drm_print.h> #include <drm/drm_probe_helper.h> #include <drm/drm_simple_kms_helper.h> #include <drm/gud.h> #include "gud_internal.h" /* Only used internally */ static const struct drm_format_info gud_drm_format_r1 = { .format = GUD_DRM_FORMAT_R1, .num_planes = 1, .char_per_block = { 1, 0, 0 }, .block_w = { 8, 0, 0 }, .block_h = { 1, 0, 0 }, .hsub = 1, .vsub = 1, }; static const struct drm_format_info gud_drm_format_xrgb1111 = { .format = GUD_DRM_FORMAT_XRGB1111, .num_planes = 1, .char_per_block = { 1, 0, 0 }, .block_w = { 2, 0, 0 }, .block_h = { 1, 0, 0 }, .hsub = 1, .vsub = 1, }; static int gud_usb_control_msg(struct usb_interface *intf, bool in, u8 request, u16 value, void *buf, size_t len) { u8 requesttype = USB_TYPE_VENDOR | USB_RECIP_INTERFACE; u8 ifnum = intf->cur_altsetting->desc.bInterfaceNumber; struct usb_device *usb = interface_to_usbdev(intf); unsigned int pipe; if (len && !buf) return -EINVAL; if (in) { pipe = usb_rcvctrlpipe(usb, 0); requesttype |= USB_DIR_IN; } else { pipe = usb_sndctrlpipe(usb, 0); requesttype |= USB_DIR_OUT; } return usb_control_msg(usb, pipe, request, requesttype, value, ifnum, buf, len, USB_CTRL_GET_TIMEOUT); } static int gud_get_display_descriptor(struct usb_interface *intf, struct gud_display_descriptor_req *desc) { void *buf; int ret; buf = kmalloc(sizeof(*desc), GFP_KERNEL); if (!buf) return -ENOMEM; ret = gud_usb_control_msg(intf, true, GUD_REQ_GET_DESCRIPTOR, 0, buf, sizeof(*desc)); memcpy(desc, buf, sizeof(*desc)); kfree(buf); if (ret < 0) return ret; if (ret != sizeof(*desc)) return -EIO; if (desc->magic != le32_to_cpu(GUD_DISPLAY_MAGIC)) return -ENODATA; DRM_DEV_DEBUG_DRIVER(&intf->dev, "version=%u flags=0x%x compression=0x%x max_buffer_size=%u\n", desc->version, le32_to_cpu(desc->flags), desc->compression, le32_to_cpu(desc->max_buffer_size)); if (!desc->version || !desc->max_width || !desc->max_height || le32_to_cpu(desc->min_width) > le32_to_cpu(desc->max_width) || le32_to_cpu(desc->min_height) > le32_to_cpu(desc->max_height)) return -EINVAL; return 0; } static int gud_status_to_errno(u8 status) { switch (status) { case GUD_STATUS_OK: return 0; case GUD_STATUS_BUSY: return -EBUSY; case GUD_STATUS_REQUEST_NOT_SUPPORTED: return -EOPNOTSUPP; case GUD_STATUS_PROTOCOL_ERROR: return -EPROTO; case GUD_STATUS_INVALID_PARAMETER: return -EINVAL; case GUD_STATUS_ERROR: return -EREMOTEIO; default: return -EREMOTEIO; } } static int gud_usb_get_status(struct usb_interface *intf) { int ret, status = -EIO; u8 *buf; buf = kmalloc(sizeof(*buf), GFP_KERNEL); if (!buf) return -ENOMEM; ret = gud_usb_control_msg(intf, true, GUD_REQ_GET_STATUS, 0, buf, sizeof(*buf)); if (ret == sizeof(*buf)) status = gud_status_to_errno(*buf); kfree(buf); if (ret < 0) return ret; return status; } static int gud_usb_transfer(struct gud_device *gdrm, bool in, u8 request, u16 index, void *buf, size_t len) { struct usb_interface *intf = to_usb_interface(gdrm->drm.dev); int idx, ret; drm_dbg(&gdrm->drm, "%s: request=0x%x index=%u len=%zu\n", in ? "get" : "set", request, index, len); if (!drm_dev_enter(&gdrm->drm, &idx)) return -ENODEV; mutex_lock(&gdrm->ctrl_lock); ret = gud_usb_control_msg(intf, in, request, index, buf, len); if (ret == -EPIPE || ((gdrm->flags & GUD_DISPLAY_FLAG_STATUS_ON_SET) && !in && ret >= 0)) { int status; status = gud_usb_get_status(intf); if (status < 0) { ret = status; } else if (ret < 0) { dev_err_once(gdrm->drm.dev, "Unexpected status OK for failed transfer\n"); ret = -EPIPE; } } if (ret < 0) { drm_dbg(&gdrm->drm, "ret=%d\n", ret); gdrm->stats_num_errors++; } mutex_unlock(&gdrm->ctrl_lock); drm_dev_exit(idx); return ret; } /* * @buf cannot be allocated on the stack. * Returns number of bytes received or negative error code on failure. */ int gud_usb_get(struct gud_device *gdrm, u8 request, u16 index, void *buf, size_t max_len) { return gud_usb_transfer(gdrm, true, request, index, buf, max_len); } /* * @buf can be allocated on the stack or NULL. * Returns zero on success or negative error code on failure. */ int gud_usb_set(struct gud_device *gdrm, u8 request, u16 index, void *buf, size_t len) { void *trbuf = NULL; int ret; if (buf && len) { trbuf = kmemdup(buf, len, GFP_KERNEL); if (!trbuf) return -ENOMEM; } ret = gud_usb_transfer(gdrm, false, request, index, trbuf, len); kfree(trbuf); if (ret < 0) return ret; return ret != len ? -EIO : 0; } /* * @val can be allocated on the stack. * Returns zero on success or negative error code on failure. */ int gud_usb_get_u8(struct gud_device *gdrm, u8 request, u16 index, u8 *val) { u8 *buf; int ret; buf = kmalloc(sizeof(*val), GFP_KERNEL); if (!buf) return -ENOMEM; ret = gud_usb_get(gdrm, request, index, buf, sizeof(*val)); *val = *buf; kfree(buf); if (ret < 0) return ret; return ret != sizeof(*val) ? -EIO : 0; } /* Returns zero on success or negative error code on failure. */ int gud_usb_set_u8(struct gud_device *gdrm, u8 request, u8 val) { return gud_usb_set(gdrm, request, 0, &val, sizeof(val)); } static int gud_get_properties(struct gud_device *gdrm) { struct gud_property_req *properties; unsigned int i, num_properties; int ret; properties = kcalloc(GUD_PROPERTIES_MAX_NUM, sizeof(*properties), GFP_KERNEL); if (!properties) return -ENOMEM; ret = gud_usb_get(gdrm, GUD_REQ_GET_PROPERTIES, 0, properties, GUD_PROPERTIES_MAX_NUM * sizeof(*properties)); if (ret <= 0) goto out; if (ret % sizeof(*properties)) { ret = -EIO; goto out; } num_properties = ret / sizeof(*properties); ret = 0; gdrm->properties = drmm_kcalloc(&gdrm->drm, num_properties, sizeof(*gdrm->properties), GFP_KERNEL); if (!gdrm->properties) { ret = -ENOMEM; goto out; } for (i = 0; i < num_properties; i++) { u16 prop = le16_to_cpu(properties[i].prop); u64 val = le64_to_cpu(properties[i].val); switch (prop) { case GUD_PROPERTY_ROTATION: /* * DRM UAPI matches the protocol so use the value directly, * but mask out any additions on future devices. */ val &= GUD_ROTATION_MASK; ret = drm_plane_create_rotation_property(&gdrm->pipe.plane, DRM_MODE_ROTATE_0, val); break; default: /* New ones might show up in future devices, skip those we don't know. */ drm_dbg(&gdrm->drm, "Ignoring unknown property: %u\n", prop); continue; } if (ret) goto out; gdrm->properties[gdrm->num_properties++] = prop; } out: kfree(properties); return ret; } /* * FIXME: Dma-buf sharing requires DMA support by the importing device. * This function is a workaround to make USB devices work as well. * See todo.rst for how to fix the issue in the dma-buf framework. */ static struct drm_gem_object *gud_gem_prime_import(struct drm_device *drm, struct dma_buf *dma_buf) { struct gud_device *gdrm = to_gud_device(drm); if (!gdrm->dmadev) return ERR_PTR(-ENODEV); return drm_gem_prime_import_dev(drm, dma_buf, gdrm->dmadev); } static int gud_stats_debugfs(struct seq_file *m, void *data) { struct drm_debugfs_entry *entry = m->private; struct gud_device *gdrm = to_gud_device(entry->dev); char buf[10]; string_get_size(gdrm->bulk_len, 1, STRING_UNITS_2, buf, sizeof(buf)); seq_printf(m, "Max buffer size: %s\n", buf); seq_printf(m, "Number of errors: %u\n", gdrm->stats_num_errors); seq_puts(m, "Compression: "); if (gdrm->compression & GUD_COMPRESSION_LZ4) seq_puts(m, " lz4"); if (!gdrm->compression) seq_puts(m, " none"); seq_puts(m, "\n"); if (gdrm->compression) { u64 remainder; u64 ratio = div64_u64_rem(gdrm->stats_length, gdrm->stats_actual_length, &remainder); u64 ratio_frac = div64_u64(remainder * 10, gdrm->stats_actual_length); seq_printf(m, "Compression ratio: %llu.%llu\n", ratio, ratio_frac); } return 0; } static const struct drm_simple_display_pipe_funcs gud_pipe_funcs = { .check = gud_pipe_check, .update = gud_pipe_update, DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS }; static const struct drm_mode_config_funcs gud_mode_config_funcs = { .fb_create = drm_gem_fb_create_with_dirty, .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; static const u64 gud_pipe_modifiers[] = { DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; DEFINE_DRM_GEM_FOPS(gud_fops); static const struct drm_driver gud_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, .fops = &gud_fops, DRM_GEM_SHMEM_DRIVER_OPS, .gem_prime_import = gud_gem_prime_import, DRM_FBDEV_SHMEM_DRIVER_OPS, .name = "gud", .desc = "Generic USB Display", .major = 1, .minor = 0, }; static int gud_alloc_bulk_buffer(struct gud_device *gdrm) { unsigned int i, num_pages; struct page **pages; void *ptr; int ret; gdrm->bulk_buf = vmalloc_32(gdrm->bulk_len); if (!gdrm->bulk_buf) return -ENOMEM; num_pages = DIV_ROUND_UP(gdrm->bulk_len, PAGE_SIZE); pages = kmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL); if (!pages) return -ENOMEM; for (i = 0, ptr = gdrm->bulk_buf; i < num_pages; i++, ptr += PAGE_SIZE) pages[i] = vmalloc_to_page(ptr); ret = sg_alloc_table_from_pages(&gdrm->bulk_sgt, pages, num_pages, 0, gdrm->bulk_len, GFP_KERNEL); kfree(pages); return ret; } static void gud_free_buffers_and_mutex(void *data) { struct gud_device *gdrm = data; vfree(gdrm->compress_buf); gdrm->compress_buf = NULL; sg_free_table(&gdrm->bulk_sgt); vfree(gdrm->bulk_buf); gdrm->bulk_buf = NULL; mutex_destroy(&gdrm->ctrl_lock); } static int gud_probe(struct usb_interface *intf, const struct usb_device_id *id) { const struct drm_format_info *xrgb8888_emulation_format = NULL; bool rgb565_supported = false, xrgb8888_supported = false; unsigned int num_formats_dev, num_formats = 0; struct usb_endpoint_descriptor *bulk_out; struct gud_display_descriptor_req desc; struct device *dev = &intf->dev; size_t max_buffer_size = 0; struct gud_device *gdrm; struct drm_device *drm; u8 *formats_dev; u32 *formats; int ret, i; ret = usb_find_bulk_out_endpoint(intf->cur_altsetting, &bulk_out); if (ret) return ret; ret = gud_get_display_descriptor(intf, &desc); if (ret) { DRM_DEV_DEBUG_DRIVER(dev, "Not a display interface: ret=%d\n", ret); return -ENODEV; } if (desc.version > 1) { dev_err(dev, "Protocol version %u is not supported\n", desc.version); return -ENODEV; } gdrm = devm_drm_dev_alloc(dev, &gud_drm_driver, struct gud_device, drm); if (IS_ERR(gdrm)) return PTR_ERR(gdrm); drm = &gdrm->drm; drm->mode_config.funcs = &gud_mode_config_funcs; ret = drmm_mode_config_init(drm); if (ret) return ret; gdrm->flags = le32_to_cpu(desc.flags); gdrm->compression = desc.compression & GUD_COMPRESSION_LZ4; if (gdrm->flags & GUD_DISPLAY_FLAG_FULL_UPDATE && gdrm->compression) return -EINVAL; mutex_init(&gdrm->ctrl_lock); mutex_init(&gdrm->damage_lock); INIT_WORK(&gdrm->work, gud_flush_work); gud_clear_damage(gdrm); ret = devm_add_action(dev, gud_free_buffers_and_mutex, gdrm); if (ret) return ret; drm->mode_config.min_width = le32_to_cpu(desc.min_width); drm->mode_config.max_width = le32_to_cpu(desc.max_width); drm->mode_config.min_height = le32_to_cpu(desc.min_height); drm->mode_config.max_height = le32_to_cpu(desc.max_height); formats_dev = devm_kmalloc(dev, GUD_FORMATS_MAX_NUM, GFP_KERNEL); /* Add room for emulated XRGB8888 */ formats = devm_kmalloc_array(dev, GUD_FORMATS_MAX_NUM + 1, sizeof(*formats), GFP_KERNEL); if (!formats_dev || !formats) return -ENOMEM; ret = gud_usb_get(gdrm, GUD_REQ_GET_FORMATS, 0, formats_dev, GUD_FORMATS_MAX_NUM); if (ret < 0) return ret; num_formats_dev = ret; for (i = 0; i < num_formats_dev; i++) { const struct drm_format_info *info; size_t fmt_buf_size; u32 format; format = gud_to_fourcc(formats_dev[i]); if (!format) { drm_dbg(drm, "Unsupported format: 0x%02x\n", formats_dev[i]); continue; } if (format == GUD_DRM_FORMAT_R1) info = &gud_drm_format_r1; else if (format == GUD_DRM_FORMAT_XRGB1111) info = &gud_drm_format_xrgb1111; else info = drm_format_info(format); switch (format) { case GUD_DRM_FORMAT_R1: fallthrough; case DRM_FORMAT_R8: fallthrough; case GUD_DRM_FORMAT_XRGB1111: fallthrough; case DRM_FORMAT_RGB332: fallthrough; case DRM_FORMAT_RGB888: if (!xrgb8888_emulation_format) xrgb8888_emulation_format = info; break; case DRM_FORMAT_RGB565: rgb565_supported = true; if (!xrgb8888_emulation_format) xrgb8888_emulation_format = info; break; case DRM_FORMAT_XRGB8888: xrgb8888_supported = true; break; } fmt_buf_size = drm_format_info_min_pitch(info, 0, drm->mode_config.max_width) * drm->mode_config.max_height; max_buffer_size = max(max_buffer_size, fmt_buf_size); if (format == GUD_DRM_FORMAT_R1 || format == GUD_DRM_FORMAT_XRGB1111) continue; /* Internal not for userspace */ formats[num_formats++] = format; } if (!num_formats && !xrgb8888_emulation_format) { dev_err(dev, "No supported pixel formats found\n"); return -EINVAL; } /* Prefer speed over color depth */ if (rgb565_supported) drm->mode_config.preferred_depth = 16; if (!xrgb8888_supported && xrgb8888_emulation_format) { gdrm->xrgb8888_emulation_format = xrgb8888_emulation_format; formats[num_formats++] = DRM_FORMAT_XRGB8888; } if (desc.max_buffer_size) max_buffer_size = le32_to_cpu(desc.max_buffer_size); /* Prevent a misbehaving device from allocating loads of RAM. 4096x4096@XRGB8888 = 64 MB */ if (max_buffer_size > SZ_64M) max_buffer_size = SZ_64M; gdrm->bulk_pipe = usb_sndbulkpipe(interface_to_usbdev(intf), usb_endpoint_num(bulk_out)); gdrm->bulk_len = max_buffer_size; ret = gud_alloc_bulk_buffer(gdrm); if (ret) return ret; if (gdrm->compression & GUD_COMPRESSION_LZ4) { gdrm->lz4_comp_mem = devm_kmalloc(dev, LZ4_MEM_COMPRESS, GFP_KERNEL); if (!gdrm->lz4_comp_mem) return -ENOMEM; gdrm->compress_buf = vmalloc(gdrm->bulk_len); if (!gdrm->compress_buf) return -ENOMEM; } ret = drm_simple_display_pipe_init(drm, &gdrm->pipe, &gud_pipe_funcs, formats, num_formats, gud_pipe_modifiers, NULL); if (ret) return ret; devm_kfree(dev, formats); devm_kfree(dev, formats_dev); ret = gud_get_properties(gdrm); if (ret) { dev_err(dev, "Failed to get properties (error=%d)\n", ret); return ret; } drm_plane_enable_fb_damage_clips(&gdrm->pipe.plane); ret = gud_get_connectors(gdrm); if (ret) { dev_err(dev, "Failed to get connectors (error=%d)\n", ret); return ret; } drm_mode_config_reset(drm); usb_set_intfdata(intf, gdrm); gdrm->dmadev = usb_intf_get_dma_device(intf); if (!gdrm->dmadev) dev_warn(dev, "buffer sharing not supported"); drm_debugfs_add_file(drm, "stats", gud_stats_debugfs, NULL); ret = drm_dev_register(drm, 0); if (ret) { put_device(gdrm->dmadev); return ret; } drm_kms_helper_poll_init(drm); drm_client_setup(drm, NULL); return 0; } static void gud_disconnect(struct usb_interface *interface) { struct gud_device *gdrm = usb_get_intfdata(interface); struct drm_device *drm = &gdrm->drm; drm_dbg(drm, "%s:\n", __func__); drm_kms_helper_poll_fini(drm); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); put_device(gdrm->dmadev); gdrm->dmadev = NULL; } static int gud_suspend(struct usb_interface *intf, pm_message_t message) { struct gud_device *gdrm = usb_get_intfdata(intf); return drm_mode_config_helper_suspend(&gdrm->drm); } static int gud_resume(struct usb_interface *intf) { struct gud_device *gdrm = usb_get_intfdata(intf); drm_mode_config_helper_resume(&gdrm->drm); return 0; } static const struct usb_device_id gud_id_table[] = { { USB_DEVICE_INTERFACE_CLASS(0x1d50, 0x614d, USB_CLASS_VENDOR_SPEC) }, { USB_DEVICE_INTERFACE_CLASS(0x16d0, 0x10a9, USB_CLASS_VENDOR_SPEC) }, { } }; MODULE_DEVICE_TABLE(usb, gud_id_table); static struct usb_driver gud_usb_driver = { .name = "gud", .probe = gud_probe, .disconnect = gud_disconnect, .id_table = gud_id_table, .suspend = gud_suspend, .resume = gud_resume, .reset_resume = gud_resume, }; module_usb_driver(gud_usb_driver); MODULE_AUTHOR("Noralf Trønnes"); MODULE_DESCRIPTION("GUD USB Display driver"); MODULE_LICENSE("Dual MIT/GPL");
1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 /* SPDX-License-Identifier: GPL-2.0-or-later * * Copyright (C) 2005 David Brownell */ #ifndef __LINUX_SPI_H #define __LINUX_SPI_H #include <linux/acpi.h> #include <linux/bits.h> #include <linux/completion.h> #include <linux/device.h> #include <linux/gpio/consumer.h> #include <linux/kthread.h> #include <linux/mod_devicetable.h> #include <linux/overflow.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/u64_stats_sync.h> #include <uapi/linux/spi/spi.h> /* Max no. of CS supported per spi device */ #define SPI_CS_CNT_MAX 16 struct dma_chan; struct software_node; struct ptp_system_timestamp; struct spi_controller; struct spi_transfer; struct spi_controller_mem_ops; struct spi_controller_mem_caps; struct spi_message; /* * INTERFACES between SPI master-side drivers and SPI slave protocol handlers, * and SPI infrastructure. */ extern const struct bus_type spi_bus_type; /** * struct spi_statistics - statistics for spi transfers * @syncp: seqcount to protect members in this struct for per-cpu update * on 32-bit systems * * @messages: number of spi-messages handled * @transfers: number of spi_transfers handled * @errors: number of errors during spi_transfer * @timedout: number of timeouts during spi_transfer * * @spi_sync: number of times spi_sync is used * @spi_sync_immediate: * number of times spi_sync is executed immediately * in calling context without queuing and scheduling * @spi_async: number of times spi_async is used * * @bytes: number of bytes transferred to/from device * @bytes_tx: number of bytes sent to device * @bytes_rx: number of bytes received from device * * @transfer_bytes_histo: * transfer bytes histogram * * @transfers_split_maxsize: * number of transfers that have been split because of * maxsize limit */ struct spi_statistics { struct u64_stats_sync syncp; u64_stats_t messages; u64_stats_t transfers; u64_stats_t errors; u64_stats_t timedout; u64_stats_t spi_sync; u64_stats_t spi_sync_immediate; u64_stats_t spi_async; u64_stats_t bytes; u64_stats_t bytes_rx; u64_stats_t bytes_tx; #define SPI_STATISTICS_HISTO_SIZE 17 u64_stats_t transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE]; u64_stats_t transfers_split_maxsize; }; #define SPI_STATISTICS_ADD_TO_FIELD(pcpu_stats, field, count) \ do { \ struct spi_statistics *__lstats; \ get_cpu(); \ __lstats = this_cpu_ptr(pcpu_stats); \ u64_stats_update_begin(&__lstats->syncp); \ u64_stats_add(&__lstats->field, count); \ u64_stats_update_end(&__lstats->syncp); \ put_cpu(); \ } while (0) #define SPI_STATISTICS_INCREMENT_FIELD(pcpu_stats, field) \ do { \ struct spi_statistics *__lstats; \ get_cpu(); \ __lstats = this_cpu_ptr(pcpu_stats); \ u64_stats_update_begin(&__lstats->syncp); \ u64_stats_inc(&__lstats->field); \ u64_stats_update_end(&__lstats->syncp); \ put_cpu(); \ } while (0) /** * struct spi_delay - SPI delay information * @value: Value for the delay * @unit: Unit for the delay */ struct spi_delay { #define SPI_DELAY_UNIT_USECS 0 #define SPI_DELAY_UNIT_NSECS 1 #define SPI_DELAY_UNIT_SCK 2 u16 value; u8 unit; }; extern int spi_delay_to_ns(struct spi_delay *_delay, struct spi_transfer *xfer); extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, struct spi_transfer *xfer); /** * struct spi_device - Controller side proxy for an SPI slave device * @dev: Driver model representation of the device. * @controller: SPI controller used with the device. * @max_speed_hz: Maximum clock rate to be used with this chip * (on this board); may be changed by the device's driver. * The spi_transfer.speed_hz can override this for each transfer. * @chip_select: Array of physical chipselect, spi->chipselect[i] gives * the corresponding physical CS for logical CS i. * @mode: The spi mode defines how data is clocked out and in. * This may be changed by the device's driver. * The "active low" default for chipselect mode can be overridden * (by specifying SPI_CS_HIGH) as can the "MSB first" default for * each word in a transfer (by specifying SPI_LSB_FIRST). * @bits_per_word: Data transfers involve one or more words; word sizes * like eight or 12 bits are common. In-memory wordsizes are * powers of two bytes (e.g. 20 bit samples use 32 bits). * This may be changed by the device's driver, or left at the * default (0) indicating protocol words are eight bit bytes. * The spi_transfer.bits_per_word can override this for each transfer. * @rt: Make the pump thread real time priority. * @irq: Negative, or the number passed to request_irq() to receive * interrupts from this device. * @controller_state: Controller's runtime state * @controller_data: Board-specific definitions for controller, such as * FIFO initialization parameters; from board_info.controller_data * @modalias: Name of the driver to use with this device, or an alias * for that name. This appears in the sysfs "modalias" attribute * for driver coldplugging, and in uevents used for hotplugging * @driver_override: If the name of a driver is written to this attribute, then * the device will bind to the named driver and only the named driver. * Do not set directly, because core frees it; use driver_set_override() to * set or clear it. * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines * (optional, NULL when not using a GPIO line) * @word_delay: delay to be inserted between consecutive * words of a transfer * @cs_setup: delay to be introduced by the controller after CS is asserted * @cs_hold: delay to be introduced by the controller before CS is deasserted * @cs_inactive: delay to be introduced by the controller after CS is * deasserted. If @cs_change_delay is used from @spi_transfer, then the * two delays will be added up. * @pcpu_statistics: statistics for the spi_device * @cs_index_mask: Bit mask of the active chipselect(s) in the chipselect array * * A @spi_device is used to interchange data between an SPI slave * (usually a discrete chip) and CPU memory. * * In @dev, the platform_data is used to hold information about this * device that's meaningful to the device's protocol driver, but not * to its controller. One example might be an identifier for a chip * variant with slightly different functionality; another might be * information about how this particular board wires the chip's pins. */ struct spi_device { struct device dev; struct spi_controller *controller; u32 max_speed_hz; u8 chip_select[SPI_CS_CNT_MAX]; u8 bits_per_word; bool rt; #define SPI_NO_TX BIT(31) /* No transmit wire */ #define SPI_NO_RX BIT(30) /* No receive wire */ /* * TPM specification defines flow control over SPI. Client device * can insert a wait state on MISO when address is transmitted by * controller on MOSI. Detecting the wait state in software is only * possible for full duplex controllers. For controllers that support * only half-duplex, the wait state detection needs to be implemented * in hardware. TPM devices would set this flag when hardware flow * control is expected from SPI controller. */ #define SPI_TPM_HW_FLOW BIT(29) /* TPM HW flow control */ /* * All bits defined above should be covered by SPI_MODE_KERNEL_MASK. * The SPI_MODE_KERNEL_MASK has the SPI_MODE_USER_MASK counterpart, * which is defined in 'include/uapi/linux/spi/spi.h'. * The bits defined here are from bit 31 downwards, while in * SPI_MODE_USER_MASK are from 0 upwards. * These bits must not overlap. A static assert check should make sure of that. * If adding extra bits, make sure to decrease the bit index below as well. */ #define SPI_MODE_KERNEL_MASK (~(BIT(29) - 1)) u32 mode; int irq; void *controller_state; void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; struct gpio_desc *cs_gpiod[SPI_CS_CNT_MAX]; /* Chip select gpio desc */ struct spi_delay word_delay; /* Inter-word delay */ /* CS delays */ struct spi_delay cs_setup; struct spi_delay cs_hold; struct spi_delay cs_inactive; /* The statistics */ struct spi_statistics __percpu *pcpu_statistics; /* Bit mask of the chipselect(s) that the driver need to use from * the chipselect array.When the controller is capable to handle * multiple chip selects & memories are connected in parallel * then more than one bit need to be set in cs_index_mask. */ u32 cs_index_mask : SPI_CS_CNT_MAX; /* * Likely need more hooks for more protocol options affecting how * the controller talks to each chip, like: * - memory packing (12 bit samples into low bits, others zeroed) * - priority * - chipselect delays * - ... */ }; /* Make sure that SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK don't overlap */ static_assert((SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK) == 0, "SPI_MODE_USER_MASK & SPI_MODE_KERNEL_MASK must not overlap"); static inline struct spi_device *to_spi_device(const struct device *dev) { return dev ? container_of(dev, struct spi_device, dev) : NULL; } /* Most drivers won't need to care about device refcounting */ static inline struct spi_device *spi_dev_get(struct spi_device *spi) { return (spi && get_device(&spi->dev)) ? spi : NULL; } static inline void spi_dev_put(struct spi_device *spi) { if (spi) put_device(&spi->dev); } /* ctldata is for the bus_controller driver's runtime state */ static inline void *spi_get_ctldata(const struct spi_device *spi) { return spi->controller_state; } static inline void spi_set_ctldata(struct spi_device *spi, void *state) { spi->controller_state = state; } /* Device driver data */ static inline void spi_set_drvdata(struct spi_device *spi, void *data) { dev_set_drvdata(&spi->dev, data); } static inline void *spi_get_drvdata(const struct spi_device *spi) { return dev_get_drvdata(&spi->dev); } static inline u8 spi_get_chipselect(const struct spi_device *spi, u8 idx) { return spi->chip_select[idx]; } static inline void spi_set_chipselect(struct spi_device *spi, u8 idx, u8 chipselect) { spi->chip_select[idx] = chipselect; } static inline struct gpio_desc *spi_get_csgpiod(const struct spi_device *spi, u8 idx) { return spi->cs_gpiod[idx]; } static inline void spi_set_csgpiod(struct spi_device *spi, u8 idx, struct gpio_desc *csgpiod) { spi->cs_gpiod[idx] = csgpiod; } static inline bool spi_is_csgpiod(struct spi_device *spi) { u8 idx; for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) { if (spi_get_csgpiod(spi, idx)) return true; } return false; } /** * struct spi_driver - Host side "protocol" driver * @id_table: List of SPI devices supported by this driver * @probe: Binds this driver to the SPI device. Drivers can verify * that the device is actually present, and may need to configure * characteristics (such as bits_per_word) which weren't needed for * the initial configuration done during system setup. * @remove: Unbinds this driver from the SPI device * @shutdown: Standard shutdown callback used during system state * transitions such as powerdown/halt and kexec * @driver: SPI device drivers should initialize the name and owner * field of this structure. * * This represents the kind of device driver that uses SPI messages to * interact with the hardware at the other end of a SPI link. It's called * a "protocol" driver because it works through messages rather than talking * directly to SPI hardware (which is what the underlying SPI controller * driver does to pass those messages). These protocols are defined in the * specification for the device(s) supported by the driver. * * As a rule, those device protocols represent the lowest level interface * supported by a driver, and it will support upper level interfaces too. * Examples of such upper levels include frameworks like MTD, networking, * MMC, RTC, filesystem character device nodes, and hardware monitoring. */ struct spi_driver { const struct spi_device_id *id_table; int (*probe)(struct spi_device *spi); void (*remove)(struct spi_device *spi); void (*shutdown)(struct spi_device *spi); struct device_driver driver; }; #define to_spi_driver(__drv) \ ( __drv ? container_of_const(__drv, struct spi_driver, driver) : NULL ) extern int __spi_register_driver(struct module *owner, struct spi_driver *sdrv); /** * spi_unregister_driver - reverse effect of spi_register_driver * @sdrv: the driver to unregister * Context: can sleep */ static inline void spi_unregister_driver(struct spi_driver *sdrv) { if (sdrv) driver_unregister(&sdrv->driver); } extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 chip_select); /* Use a define to avoid include chaining to get THIS_MODULE */ #define spi_register_driver(driver) \ __spi_register_driver(THIS_MODULE, driver) /** * module_spi_driver() - Helper macro for registering a SPI driver * @__spi_driver: spi_driver struct * * Helper macro for SPI drivers which do not do anything special in module * init/exit. This eliminates a lot of boilerplate. Each module may only * use this macro once, and calling it replaces module_init() and module_exit() */ #define module_spi_driver(__spi_driver) \ module_driver(__spi_driver, spi_register_driver, \ spi_unregister_driver) /** * struct spi_controller - interface to SPI master or slave controller * @dev: device interface to this driver * @list: link with the global spi_controller list * @bus_num: board-specific (and often SOC-specific) identifier for a * given SPI controller. * @num_chipselect: chipselects are used to distinguish individual * SPI slaves, and are numbered from zero to num_chipselects. * each slave has a chipselect signal, but it's common that not * every chipselect is connected to a slave. * @dma_alignment: SPI controller constraint on DMA buffers alignment. * @mode_bits: flags understood by this controller driver * @buswidth_override_bits: flags to override for this controller driver * @bits_per_word_mask: A mask indicating which values of bits_per_word are * supported by the driver. Bit n indicates that a bits_per_word n+1 is * supported. If set, the SPI core will reject any transfer with an * unsupported bits_per_word. If not set, this value is simply ignored, * and it's up to the individual driver to perform any validation. * @min_speed_hz: Lowest supported transfer speed * @max_speed_hz: Highest supported transfer speed * @flags: other constraints relevant to this driver * @slave: indicates that this is an SPI slave controller * @target: indicates that this is an SPI target controller * @devm_allocated: whether the allocation of this struct is devres-managed * @max_transfer_size: function that returns the max transfer size for * a &spi_device; may be %NULL, so the default %SIZE_MAX will be used. * @max_message_size: function that returns the max message size for * a &spi_device; may be %NULL, so the default %SIZE_MAX will be used. * @io_mutex: mutex for physical bus access * @add_lock: mutex to avoid adding devices to the same chipselect * @bus_lock_spinlock: spinlock for SPI bus locking * @bus_lock_mutex: mutex for exclusion of multiple callers * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use * @setup: updates the device mode and clocking records used by a * device's SPI controller; protocol code may call this. This * must fail if an unrecognized or unsupported mode is requested. * It's always safe to call this unless transfers are pending on * the device whose settings are being modified. * @set_cs_timing: optional hook for SPI devices to request SPI master * controller for configuring specific CS setup time, hold time and inactive * delay interms of clock counts * @transfer: adds a message to the controller's transfer queue. * @cleanup: frees controller-specific state * @can_dma: determine whether this controller supports DMA * @dma_map_dev: device which can be used for DMA mapping * @cur_rx_dma_dev: device which is currently used for RX DMA mapping * @cur_tx_dma_dev: device which is currently used for TX DMA mapping * @queued: whether this controller is providing an internal message queue * @kworker: pointer to thread struct for message pump * @pump_messages: work struct for scheduling work to the message pump * @queue_lock: spinlock to synchronise access to message queue * @queue: message queue * @cur_msg: the currently in-flight message * @cur_msg_completion: a completion for the current in-flight message * @cur_msg_incomplete: Flag used internally to opportunistically skip * the @cur_msg_completion. This flag is used to check if the driver has * already called spi_finalize_current_message(). * @cur_msg_need_completion: Flag used internally to opportunistically skip * the @cur_msg_completion. This flag is used to signal the context that * is running spi_finalize_current_message() that it needs to complete() * @fallback: fallback to PIO if DMA transfer return failure with * SPI_TRANS_FAIL_NO_START. * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip * selected * @last_cs_index_mask: bit mask the last chip selects that were used * @xfer_completion: used by core transfer_one_message() * @busy: message pump is busy * @running: message pump is running * @rt: whether this queue is set to run as a realtime task * @auto_runtime_pm: the core should ensure a runtime PM reference is held * while the hardware is prepared, using the parent * device for the spidev * @max_dma_len: Maximum length of a DMA transfer for the device. * @prepare_transfer_hardware: a message will soon arrive from the queue * so the subsystem requests the driver to prepare the transfer hardware * by issuing this call * @transfer_one_message: the subsystem calls the driver to transfer a single * message while queuing transfers that arrive in the meantime. When the * driver is finished with this message, it must call * spi_finalize_current_message() so the subsystem can issue the next * message * @unprepare_transfer_hardware: there are currently no more messages on the * queue so the subsystem notifies the driver that it may relax the * hardware by issuing this call * * @set_cs: set the logic level of the chip select line. May be called * from interrupt context. * @optimize_message: optimize the message for reuse * @unoptimize_message: release resources allocated by optimize_message * @prepare_message: set up the controller to transfer a single message, * for example doing DMA mapping. Called from threaded * context. * @transfer_one: transfer a single spi_transfer. * * - return 0 if the transfer is finished, * - return 1 if the transfer is still in progress. When * the driver is finished with this transfer it must * call spi_finalize_current_transfer() so the subsystem * can issue the next transfer. If the transfer fails, the * driver must set the flag SPI_TRANS_FAIL_IO to * spi_transfer->error first, before calling * spi_finalize_current_transfer(). * Note: transfer_one and transfer_one_message are mutually * exclusive; when both are set, the generic subsystem does * not call your transfer_one callback. * @handle_err: the subsystem calls the driver to handle an error that occurs * in the generic implementation of transfer_one_message(). * @mem_ops: optimized/dedicated operations for interactions with SPI memory. * This field is optional and should only be implemented if the * controller has native support for memory like operations. * @mem_caps: controller capabilities for the handling of memory operations. * @unprepare_message: undo any work done by prepare_message(). * @target_abort: abort the ongoing transfer request on an SPI target controller * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS * number. Any individual value may be NULL for CS lines that * are not GPIOs (driven by the SPI controller itself). * @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab * GPIO descriptors. This will fill in @cs_gpiods and SPI devices will have * the cs_gpiod assigned if a GPIO line is found for the chipselect. * @unused_native_cs: When cs_gpiods is used, spi_register_controller() will * fill in this field with the first unused native CS, to be used by SPI * controller drivers that need to drive a native CS when using GPIO CS. * @max_native_cs: When cs_gpiods is used, and this field is filled in, * spi_register_controller() will validate all native CS (including the * unused native CS) against this value. * @pcpu_statistics: statistics for the spi_controller * @dma_tx: DMA transmit channel * @dma_rx: DMA receive channel * @dummy_rx: dummy receive buffer for full-duplex devices * @dummy_tx: dummy transmit buffer for full-duplex devices * @fw_translate_cs: If the boot firmware uses different numbering scheme * what Linux expects, this optional hook can be used to translate * between the two. * @ptp_sts_supported: If the driver sets this to true, it must provide a * time snapshot in @spi_transfer->ptp_sts as close as possible to the * moment in time when @spi_transfer->ptp_sts_word_pre and * @spi_transfer->ptp_sts_word_post were transmitted. * If the driver does not set this, the SPI core takes the snapshot as * close to the driver hand-over as possible. * @irq_flags: Interrupt enable state during PTP system timestamping * @queue_empty: signal green light for opportunistically skipping the queue * for spi_sync transfers. * @must_async: disable all fast paths in the core * @defer_optimize_message: set to true if controller cannot pre-optimize messages * and needs to defer the optimization step until the message is actually * being transferred * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals * but not chip select signals. Each device may be configured to use a * different clock rate, since those shared signals are ignored unless * the chip is selected. * * The driver for an SPI controller manages access to those devices through * a queue of spi_message transactions, copying data between CPU memory and * an SPI slave device. For each such message it queues, it calls the * message's completion function when the transaction completes. */ struct spi_controller { struct device dev; struct list_head list; /* * Other than negative (== assign one dynamically), bus_num is fully * board-specific. Usually that simplifies to being SoC-specific. * example: one SoC has three SPI controllers, numbered 0..2, * and one board's schematics might show it using SPI-2. Software * would normally use bus_num=2 for that controller. */ s16 bus_num; /* * Chipselects will be integral to many controllers; some others * might use board-specific GPIOs. */ u16 num_chipselect; /* Some SPI controllers pose alignment requirements on DMAable * buffers; let protocol drivers know about these requirements. */ u16 dma_alignment; /* spi_device.mode flags understood by this controller driver */ u32 mode_bits; /* spi_device.mode flags override flags for this controller */ u32 buswidth_override_bits; /* Bitmask of supported bits_per_word for transfers */ u32 bits_per_word_mask; #define SPI_BPW_MASK(bits) BIT((bits) - 1) #define SPI_BPW_RANGE_MASK(min, max) GENMASK((max) - 1, (min) - 1) /* Limits on transfer speed */ u32 min_speed_hz; u32 max_speed_hz; /* Other constraints relevant to this driver */ u16 flags; #define SPI_CONTROLLER_HALF_DUPLEX BIT(0) /* Can't do full duplex */ #define SPI_CONTROLLER_NO_RX BIT(1) /* Can't do buffer read */ #define SPI_CONTROLLER_NO_TX BIT(2) /* Can't do buffer write */ #define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */ #define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */ #define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select slave */ #define SPI_CONTROLLER_SUSPENDED BIT(6) /* Currently suspended */ /* * The spi-controller has multi chip select capability and can * assert/de-assert more than one chip select at once. */ #define SPI_CONTROLLER_MULTI_CS BIT(7) /* Flag indicating if the allocation of this struct is devres-managed */ bool devm_allocated; union { /* Flag indicating this is an SPI slave controller */ bool slave; /* Flag indicating this is an SPI target controller */ bool target; }; /* * On some hardware transfer / message size may be constrained * the limit may depend on device transfer settings. */ size_t (*max_transfer_size)(struct spi_device *spi); size_t (*max_message_size)(struct spi_device *spi); /* I/O mutex */ struct mutex io_mutex; /* Used to avoid adding the same CS twice */ struct mutex add_lock; /* Lock and mutex for SPI bus locking */ spinlock_t bus_lock_spinlock; struct mutex bus_lock_mutex; /* Flag indicating that the SPI bus is locked for exclusive use */ bool bus_lock_flag; /* * Setup mode and clock, etc (SPI driver may call many times). * * IMPORTANT: this may be called when transfers to another * device are active. DO NOT UPDATE SHARED REGISTERS in ways * which could break those transfers. */ int (*setup)(struct spi_device *spi); /* * set_cs_timing() method is for SPI controllers that supports * configuring CS timing. * * This hook allows SPI client drivers to request SPI controllers * to configure specific CS timing through spi_set_cs_timing() after * spi_setup(). */ int (*set_cs_timing)(struct spi_device *spi); /* * Bidirectional bulk transfers * * + The transfer() method may not sleep; its main role is * just to add the message to the queue. * + For now there's no remove-from-queue operation, or * any other request management * + To a given spi_device, message queueing is pure FIFO * * + The controller's main job is to process its message queue, * selecting a chip (for masters), then transferring data * + If there are multiple spi_device children, the i/o queue * arbitration algorithm is unspecified (round robin, FIFO, * priority, reservations, preemption, etc) * * + Chipselect stays active during the entire message * (unless modified by spi_transfer.cs_change != 0). * + The message transfers use clock and SPI mode parameters * previously established by setup() for this device */ int (*transfer)(struct spi_device *spi, struct spi_message *mesg); /* Called on release() to free memory provided by spi_controller */ void (*cleanup)(struct spi_device *spi); /* * Used to enable core support for DMA handling, if can_dma() * exists and returns true then the transfer will be mapped * prior to transfer_one() being called. The driver should * not modify or store xfer and dma_tx and dma_rx must be set * while the device is prepared. */ bool (*can_dma)(struct spi_controller *ctlr, struct spi_device *spi, struct spi_transfer *xfer); struct device *dma_map_dev; struct device *cur_rx_dma_dev; struct device *cur_tx_dma_dev; /* * These hooks are for drivers that want to use the generic * controller transfer queueing mechanism. If these are used, the * transfer() function above must NOT be specified by the driver. * Over time we expect SPI drivers to be phased over to this API. */ bool queued; struct kthread_worker *kworker; struct kthread_work pump_messages; spinlock_t queue_lock; struct list_head queue; struct spi_message *cur_msg; struct completion cur_msg_completion; bool cur_msg_incomplete; bool cur_msg_need_completion; bool busy; bool running; bool rt; bool auto_runtime_pm; bool fallback; bool last_cs_mode_high; s8 last_cs[SPI_CS_CNT_MAX]; u32 last_cs_index_mask : SPI_CS_CNT_MAX; struct completion xfer_completion; size_t max_dma_len; int (*optimize_message)(struct spi_message *msg); int (*unoptimize_message)(struct spi_message *msg); int (*prepare_transfer_hardware)(struct spi_controller *ctlr); int (*transfer_one_message)(struct spi_controller *ctlr, struct spi_message *mesg); int (*unprepare_transfer_hardware)(struct spi_controller *ctlr); int (*prepare_message)(struct spi_controller *ctlr, struct spi_message *message); int (*unprepare_message)(struct spi_controller *ctlr, struct spi_message *message); int (*target_abort)(struct spi_controller *ctlr); /* * These hooks are for drivers that use a generic implementation * of transfer_one_message() provided by the core. */ void (*set_cs)(struct spi_device *spi, bool enable); int (*transfer_one)(struct spi_controller *ctlr, struct spi_device *spi, struct spi_transfer *transfer); void (*handle_err)(struct spi_controller *ctlr, struct spi_message *message); /* Optimized handlers for SPI memory-like operations. */ const struct spi_controller_mem_ops *mem_ops; const struct spi_controller_mem_caps *mem_caps; /* GPIO chip select */ struct gpio_desc **cs_gpiods; bool use_gpio_descriptors; s8 unused_native_cs; s8 max_native_cs; /* Statistics */ struct spi_statistics __percpu *pcpu_statistics; /* DMA channels for use with core dmaengine helpers */ struct dma_chan *dma_tx; struct dma_chan *dma_rx; /* Dummy data for full duplex devices */ void *dummy_rx; void *dummy_tx; int (*fw_translate_cs)(struct spi_controller *ctlr, unsigned cs); /* * Driver sets this field to indicate it is able to snapshot SPI * transfers (needed e.g. for reading the time of POSIX clocks) */ bool ptp_sts_supported; /* Interrupt enable state during PTP system timestamping */ unsigned long irq_flags; /* Flag for enabling opportunistic skipping of the queue in spi_sync */ bool queue_empty; bool must_async; bool defer_optimize_message; }; static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) { return dev_get_drvdata(&ctlr->dev); } static inline void spi_controller_set_devdata(struct spi_controller *ctlr, void *data) { dev_set_drvdata(&ctlr->dev, data); } static inline struct spi_controller *spi_controller_get(struct spi_controller *ctlr) { if (!ctlr || !get_device(&ctlr->dev)) return NULL; return ctlr; } static inline void spi_controller_put(struct spi_controller *ctlr) { if (ctlr) put_device(&ctlr->dev); } static inline bool spi_controller_is_target(struct spi_controller *ctlr) { return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->target; } /* PM calls that need to be issued by the driver */ extern int spi_controller_suspend(struct spi_controller *ctlr); extern int spi_controller_resume(struct spi_controller *ctlr); /* Calls the driver make to interact with the message queue */ extern struct spi_message *spi_get_next_queued_message(struct spi_controller *ctlr); extern void spi_finalize_current_message(struct spi_controller *ctlr); extern void spi_finalize_current_transfer(struct spi_controller *ctlr); /* Helper calls for driver to timestamp transfer */ void spi_take_timestamp_pre(struct spi_controller *ctlr, struct spi_transfer *xfer, size_t progress, bool irqs_off); void spi_take_timestamp_post(struct spi_controller *ctlr, struct spi_transfer *xfer, size_t progress, bool irqs_off); /* The SPI driver core manages memory for the spi_controller classdev */ extern struct spi_controller *__spi_alloc_controller(struct device *host, unsigned int size, bool slave); static inline struct spi_controller *spi_alloc_host(struct device *dev, unsigned int size) { return __spi_alloc_controller(dev, size, false); } static inline struct spi_controller *spi_alloc_target(struct device *dev, unsigned int size) { if (!IS_ENABLED(CONFIG_SPI_SLAVE)) return NULL; return __spi_alloc_controller(dev, size, true); } struct spi_controller *__devm_spi_alloc_controller(struct device *dev, unsigned int size, bool slave); static inline struct spi_controller *devm_spi_alloc_host(struct device *dev, unsigned int size) { return __devm_spi_alloc_controller(dev, size, false); } static inline struct spi_controller *devm_spi_alloc_target(struct device *dev, unsigned int size) { if (!IS_ENABLED(CONFIG_SPI_SLAVE)) return NULL; return __devm_spi_alloc_controller(dev, size, true); } extern int spi_register_controller(struct spi_controller *ctlr); extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); #if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SPI_MASTER) extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); int acpi_spi_count_resources(struct acpi_device *adev); #else static inline struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev) { return NULL; } static inline struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index) { return ERR_PTR(-ENODEV); } static inline int acpi_spi_count_resources(struct acpi_device *adev) { return 0; } #endif /* * SPI resource management while processing a SPI message */ typedef void (*spi_res_release_t)(struct spi_controller *ctlr, struct spi_message *msg, void *res); /** * struct spi_res - SPI resource management structure * @entry: list entry * @release: release code called prior to freeing this resource * @data: extra data allocated for the specific use-case * * This is based on ideas from devres, but focused on life-cycle * management during spi_message processing. */ struct spi_res { struct list_head entry; spi_res_release_t release; unsigned long long data[]; /* Guarantee ull alignment */ }; /*---------------------------------------------------------------------------*/ /* * I/O INTERFACE between SPI controller and protocol drivers * * Protocol drivers use a queue of spi_messages, each transferring data * between the controller and memory buffers. * * The spi_messages themselves consist of a series of read+write transfer * segments. Those segments always read the same number of bits as they * write; but one or the other is easily ignored by passing a NULL buffer * pointer. (This is unlike most types of I/O API, because SPI hardware * is full duplex.) * * NOTE: Allocation of spi_transfer and spi_message memory is entirely * up to the protocol driver, which guarantees the integrity of both (as * well as the data buffers) for as long as the message is queued. */ /** * struct spi_transfer - a read/write buffer pair * @tx_buf: data to be written (DMA-safe memory), or NULL * @rx_buf: data to be read (DMA-safe memory), or NULL * @tx_dma: DMA address of tx_buf, currently not for client use * @rx_dma: DMA address of rx_buf, currently not for client use * @tx_nbits: number of bits used for writing. If 0 the default * (SPI_NBITS_SINGLE) is used. * @rx_nbits: number of bits used for reading. If 0 the default * (SPI_NBITS_SINGLE) is used. * @len: size of rx and tx buffers (in bytes) * @speed_hz: Select a speed other than the device default for this * transfer. If 0 the default (from @spi_device) is used. * @bits_per_word: select a bits_per_word other than the device default * for this transfer. If 0 the default (from @spi_device) is used. * @dummy_data: indicates transfer is dummy bytes transfer. * @cs_off: performs the transfer with chipselect off. * @cs_change: affects chipselect after this transfer completes * @cs_change_delay: delay between cs deassert and assert when * @cs_change is set and @spi_transfer is not the last in @spi_message * @delay: delay to be introduced after this transfer before * (optionally) changing the chipselect status, then starting * the next transfer or completing this @spi_message. * @word_delay: inter word delay to be introduced after each word size * (set by bits_per_word) transmission. * @effective_speed_hz: the effective SCK-speed that was used to * transfer this transfer. Set to 0 if the SPI bus driver does * not support it. * @transfer_list: transfers are sequenced through @spi_message.transfers * @tx_sg_mapped: If true, the @tx_sg is mapped for DMA * @rx_sg_mapped: If true, the @rx_sg is mapped for DMA * @tx_sg: Scatterlist for transmit, currently not for client use * @rx_sg: Scatterlist for receive, currently not for client use * @ptp_sts_word_pre: The word (subject to bits_per_word semantics) offset * within @tx_buf for which the SPI device is requesting that the time * snapshot for this transfer begins. Upon completing the SPI transfer, * this value may have changed compared to what was requested, depending * on the available snapshotting resolution (DMA transfer, * @ptp_sts_supported is false, etc). * @ptp_sts_word_post: See @ptp_sts_word_post. The two can be equal (meaning * that a single byte should be snapshotted). * If the core takes care of the timestamp (if @ptp_sts_supported is false * for this controller), it will set @ptp_sts_word_pre to 0, and * @ptp_sts_word_post to the length of the transfer. This is done * purposefully (instead of setting to spi_transfer->len - 1) to denote * that a transfer-level snapshot taken from within the driver may still * be of higher quality. * @ptp_sts: Pointer to a memory location held by the SPI slave device where a * PTP system timestamp structure may lie. If drivers use PIO or their * hardware has some sort of assist for retrieving exact transfer timing, * they can (and should) assert @ptp_sts_supported and populate this * structure using the ptp_read_system_*ts helper functions. * The timestamp must represent the time at which the SPI slave device has * processed the word, i.e. the "pre" timestamp should be taken before * transmitting the "pre" word, and the "post" timestamp after receiving * transmit confirmation from the controller for the "post" word. * @timestamped: true if the transfer has been timestamped * @error: Error status logged by SPI controller driver. * * SPI transfers always write the same number of bytes as they read. * Protocol drivers should always provide @rx_buf and/or @tx_buf. * In some cases, they may also want to provide DMA addresses for * the data being transferred; that may reduce overhead, when the * underlying driver uses DMA. * * If the transmit buffer is NULL, zeroes will be shifted out * while filling @rx_buf. If the receive buffer is NULL, the data * shifted in will be discarded. Only "len" bytes shift out (or in). * It's an error to try to shift out a partial word. (For example, by * shifting out three bytes with word size of sixteen or twenty bits; * the former uses two bytes per word, the latter uses four bytes.) * * In-memory data values are always in native CPU byte order, translated * from the wire byte order (big-endian except with SPI_LSB_FIRST). So * for example when bits_per_word is sixteen, buffers are 2N bytes long * (@len = 2N) and hold N sixteen bit words in CPU byte order. * * When the word size of the SPI transfer is not a power-of-two multiple * of eight bits, those in-memory words include extra bits. In-memory * words are always seen by protocol drivers as right-justified, so the * undefined (rx) or unused (tx) bits are always the most significant bits. * * All SPI transfers start with the relevant chipselect active. Normally * it stays selected until after the last transfer in a message. Drivers * can affect the chipselect signal using cs_change. * * (i) If the transfer isn't the last one in the message, this flag is * used to make the chipselect briefly go inactive in the middle of the * message. Toggling chipselect in this way may be needed to terminate * a chip command, letting a single spi_message perform all of group of * chip transactions together. * * (ii) When the transfer is the last one in the message, the chip may * stay selected until the next transfer. On multi-device SPI busses * with nothing blocking messages going to other devices, this is just * a performance hint; starting a message to another device deselects * this one. But in other cases, this can be used to ensure correctness. * Some devices need protocol transactions to be built from a series of * spi_message submissions, where the content of one message is determined * by the results of previous messages and where the whole transaction * ends when the chipselect goes inactive. * * When SPI can transfer in 1x,2x or 4x. It can get this transfer information * from device through @tx_nbits and @rx_nbits. In Bi-direction, these * two should both be set. User can set transfer mode with SPI_NBITS_SINGLE(1x) * SPI_NBITS_DUAL(2x) and SPI_NBITS_QUAD(4x) to support these three transfer. * * The code that submits an spi_message (and its spi_transfers) * to the lower layers is responsible for managing its memory. * Zero-initialize every field you don't set up explicitly, to * insulate against future API updates. After you submit a message * and its transfers, ignore them until its completion callback. */ struct spi_transfer { /* * It's okay if tx_buf == rx_buf (right?). * For MicroWire, one buffer must be NULL. * Buffers must work with dma_*map_single() calls. */ const void *tx_buf; void *rx_buf; unsigned len; #define SPI_TRANS_FAIL_NO_START BIT(0) #define SPI_TRANS_FAIL_IO BIT(1) u16 error; bool tx_sg_mapped; bool rx_sg_mapped; struct sg_table tx_sg; struct sg_table rx_sg; dma_addr_t tx_dma; dma_addr_t rx_dma; unsigned dummy_data:1; unsigned cs_off:1; unsigned cs_change:1; unsigned tx_nbits:4; unsigned rx_nbits:4; unsigned timestamped:1; #define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */ #define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */ #define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */ #define SPI_NBITS_OCTAL 0x08 /* 8-bit transfer */ u8 bits_per_word; struct spi_delay delay; struct spi_delay cs_change_delay; struct spi_delay word_delay; u32 speed_hz; u32 effective_speed_hz; unsigned int ptp_sts_word_pre; unsigned int ptp_sts_word_post; struct ptp_system_timestamp *ptp_sts; struct list_head transfer_list; }; /** * struct spi_message - one multi-segment SPI transaction * @transfers: list of transfer segments in this transaction * @spi: SPI device to which the transaction is queued * @pre_optimized: peripheral driver pre-optimized the message * @optimized: the message is in the optimized state * @prepared: spi_prepare_message was called for the this message * @status: zero for success, else negative errno * @complete: called to report transaction completions * @context: the argument to complete() when it's called * @frame_length: the total number of bytes in the message * @actual_length: the total number of bytes that were transferred in all * successful segments * @queue: for use by whichever driver currently owns the message * @state: for use by whichever driver currently owns the message * @opt_state: for use by whichever driver currently owns the message * @resources: for resource management when the SPI message is processed * * A @spi_message is used to execute an atomic sequence of data transfers, * each represented by a struct spi_transfer. The sequence is "atomic" * in the sense that no other spi_message may use that SPI bus until that * sequence completes. On some systems, many such sequences can execute as * a single programmed DMA transfer. On all systems, these messages are * queued, and might complete after transactions to other devices. Messages * sent to a given spi_device are always executed in FIFO order. * * The code that submits an spi_message (and its spi_transfers) * to the lower layers is responsible for managing its memory. * Zero-initialize every field you don't set up explicitly, to * insulate against future API updates. After you submit a message * and its transfers, ignore them until its completion callback. */ struct spi_message { struct list_head transfers; struct spi_device *spi; /* spi_optimize_message() was called for this message */ bool pre_optimized; /* __spi_optimize_message() was called for this message */ bool optimized; /* spi_prepare_message() was called for this message */ bool prepared; /* * REVISIT: we might want a flag affecting the behavior of the * last transfer ... allowing things like "read 16 bit length L" * immediately followed by "read L bytes". Basically imposing * a specific message scheduling algorithm. * * Some controller drivers (message-at-a-time queue processing) * could provide that as their default scheduling algorithm. But * others (with multi-message pipelines) could need a flag to * tell them about such special cases. */ /* Completion is reported through a callback */ int status; void (*complete)(void *context); void *context; unsigned frame_length; unsigned actual_length; /* * For optional use by whatever driver currently owns the * spi_message ... between calls to spi_async and then later * complete(), that's the spi_controller controller driver. */ struct list_head queue; void *state; /* * Optional state for use by controller driver between calls to * __spi_optimize_message() and __spi_unoptimize_message(). */ void *opt_state; /* List of spi_res resources when the SPI message is processed */ struct list_head resources; }; static inline void spi_message_init_no_memset(struct spi_message *m) { INIT_LIST_HEAD(&m->transfers); INIT_LIST_HEAD(&m->resources); } static inline void spi_message_init(struct spi_message *m) { memset(m, 0, sizeof *m); spi_message_init_no_memset(m); } static inline void spi_message_add_tail(struct spi_transfer *t, struct spi_message *m) { list_add_tail(&t->transfer_list, &m->transfers); } static inline void spi_transfer_del(struct spi_transfer *t) { list_del(&t->transfer_list); } static inline int spi_transfer_delay_exec(struct spi_transfer *t) { return spi_delay_exec(&t->delay, t); } /** * spi_message_init_with_transfers - Initialize spi_message and append transfers * @m: spi_message to be initialized * @xfers: An array of SPI transfers * @num_xfers: Number of items in the xfer array * * This function initializes the given spi_message and adds each spi_transfer in * the given array to the message. */ static inline void spi_message_init_with_transfers(struct spi_message *m, struct spi_transfer *xfers, unsigned int num_xfers) { unsigned int i; spi_message_init(m); for (i = 0; i < num_xfers; ++i) spi_message_add_tail(&xfers[i], m); } /* * It's fine to embed message and transaction structures in other data * structures so long as you don't free them while they're in use. */ static inline struct spi_message *spi_message_alloc(unsigned ntrans, gfp_t flags) { struct spi_message_with_transfers { struct spi_message m; struct spi_transfer t[]; } *mwt; unsigned i; mwt = kzalloc(struct_size(mwt, t, ntrans), flags); if (!mwt) return NULL; spi_message_init_no_memset(&mwt->m); for (i = 0; i < ntrans; i++) spi_message_add_tail(&mwt->t[i], &mwt->m); return &mwt->m; } static inline void spi_message_free(struct spi_message *m) { kfree(m); } extern int spi_optimize_message(struct spi_device *spi, struct spi_message *msg); extern void spi_unoptimize_message(struct spi_message *msg); extern int devm_spi_optimize_message(struct device *dev, struct spi_device *spi, struct spi_message *msg); extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); extern int spi_target_abort(struct spi_device *spi); static inline size_t spi_max_message_size(struct spi_device *spi) { struct spi_controller *ctlr = spi->controller; if (!ctlr->max_message_size) return SIZE_MAX; return ctlr->max_message_size(spi); } static inline size_t spi_max_transfer_size(struct spi_device *spi) { struct spi_controller *ctlr = spi->controller; size_t tr_max = SIZE_MAX; size_t msg_max = spi_max_message_size(spi); if (ctlr->max_transfer_size) tr_max = ctlr->max_transfer_size(spi); /* Transfer size limit must not be greater than message size limit */ return min(tr_max, msg_max); } /** * spi_is_bpw_supported - Check if bits per word is supported * @spi: SPI device * @bpw: Bits per word * * This function checks to see if the SPI controller supports @bpw. * * Returns: * True if @bpw is supported, false otherwise. */ static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw) { u32 bpw_mask = spi->controller->bits_per_word_mask; if (bpw == 8 || (bpw <= 32 && bpw_mask & SPI_BPW_MASK(bpw))) return true; return false; } /** * spi_controller_xfer_timeout - Compute a suitable timeout value * @ctlr: SPI device * @xfer: Transfer descriptor * * Compute a relevant timeout value for the given transfer. We derive the time * that it would take on a single data line and take twice this amount of time * with a minimum of 500ms to avoid false positives on loaded systems. * * Returns: Transfer timeout value in milliseconds. */ static inline unsigned int spi_controller_xfer_timeout(struct spi_controller *ctlr, struct spi_transfer *xfer) { return max(xfer->len * 8 * 2 / (xfer->speed_hz / 1000), 500U); } /*---------------------------------------------------------------------------*/ /* SPI transfer replacement methods which make use of spi_res */ struct spi_replaced_transfers; typedef void (*spi_replaced_release_t)(struct spi_controller *ctlr, struct spi_message *msg, struct spi_replaced_transfers *res); /** * struct spi_replaced_transfers - structure describing the spi_transfer * replacements that have occurred * so that they can get reverted * @release: some extra release code to get executed prior to * releasing this structure * @extradata: pointer to some extra data if requested or NULL * @replaced_transfers: transfers that have been replaced and which need * to get restored * @replaced_after: the transfer after which the @replaced_transfers * are to get re-inserted * @inserted: number of transfers inserted * @inserted_transfers: array of spi_transfers of array-size @inserted, * that have been replacing replaced_transfers * * Note: that @extradata will point to @inserted_transfers[@inserted] * if some extra allocation is requested, so alignment will be the same * as for spi_transfers. */ struct spi_replaced_transfers { spi_replaced_release_t release; void *extradata; struct list_head replaced_transfers; struct list_head *replaced_after; size_t inserted; struct spi_transfer inserted_transfers[]; }; /*---------------------------------------------------------------------------*/ /* SPI transfer transformation methods */ extern int spi_split_transfers_maxsize(struct spi_controller *ctlr, struct spi_message *msg, size_t maxsize); extern int spi_split_transfers_maxwords(struct spi_controller *ctlr, struct spi_message *msg, size_t maxwords); /*---------------------------------------------------------------------------*/ /* * All these synchronous SPI transfer routines are utilities layered * over the core async transfer primitive. Here, "synchronous" means * they will sleep uninterruptibly until the async transfer completes. */ extern int spi_sync(struct spi_device *spi, struct spi_message *message); extern int spi_sync_locked(struct spi_device *spi, struct spi_message *message); extern int spi_bus_lock(struct spi_controller *ctlr); extern int spi_bus_unlock(struct spi_controller *ctlr); /** * spi_sync_transfer - synchronous SPI data transfer * @spi: device with which data will be exchanged * @xfers: An array of spi_transfers * @num_xfers: Number of items in the xfer array * Context: can sleep * * Does a synchronous SPI data transfer of the given spi_transfer array. * * For more specific semantics see spi_sync(). * * Return: zero on success, else a negative error code. */ static inline int spi_sync_transfer(struct spi_device *spi, struct spi_transfer *xfers, unsigned int num_xfers) { struct spi_message msg; spi_message_init_with_transfers(&msg, xfers, num_xfers); return spi_sync(spi, &msg); } /** * spi_write - SPI synchronous write * @spi: device to which data will be written * @buf: data buffer * @len: data buffer size * Context: can sleep * * This function writes the buffer @buf. * Callable only from contexts that can sleep. * * Return: zero on success, else a negative error code. */ static inline int spi_write(struct spi_device *spi, const void *buf, size_t len) { struct spi_transfer t = { .tx_buf = buf, .len = len, }; return spi_sync_transfer(spi, &t, 1); } /** * spi_read - SPI synchronous read * @spi: device from which data will be read * @buf: data buffer * @len: data buffer size * Context: can sleep * * This function reads the buffer @buf. * Callable only from contexts that can sleep. * * Return: zero on success, else a negative error code. */ static inline int spi_read(struct spi_device *spi, void *buf, size_t len) { struct spi_transfer t = { .rx_buf = buf, .len = len, }; return spi_sync_transfer(spi, &t, 1); } /* This copies txbuf and rxbuf data; for small transfers only! */ extern int spi_write_then_read(struct spi_device *spi, const void *txbuf, unsigned n_tx, void *rxbuf, unsigned n_rx); /** * spi_w8r8 - SPI synchronous 8 bit write followed by 8 bit read * @spi: device with which data will be exchanged * @cmd: command to be written before data is read back * Context: can sleep * * Callable only from contexts that can sleep. * * Return: the (unsigned) eight bit number returned by the * device, or else a negative error code. */ static inline ssize_t spi_w8r8(struct spi_device *spi, u8 cmd) { ssize_t status; u8 result; status = spi_write_then_read(spi, &cmd, 1, &result, 1); /* Return negative errno or unsigned value */ return (status < 0) ? status : result; } /** * spi_w8r16 - SPI synchronous 8 bit write followed by 16 bit read * @spi: device with which data will be exchanged * @cmd: command to be written before data is read back * Context: can sleep * * The number is returned in wire-order, which is at least sometimes * big-endian. * * Callable only from contexts that can sleep. * * Return: the (unsigned) sixteen bit number returned by the * device, or else a negative error code. */ static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd) { ssize_t status; u16 result; status = spi_write_then_read(spi, &cmd, 1, &result, 2); /* Return negative errno or unsigned value */ return (status < 0) ? status : result; } /** * spi_w8r16be - SPI synchronous 8 bit write followed by 16 bit big-endian read * @spi: device with which data will be exchanged * @cmd: command to be written before data is read back * Context: can sleep * * This function is similar to spi_w8r16, with the exception that it will * convert the read 16 bit data word from big-endian to native endianness. * * Callable only from contexts that can sleep. * * Return: the (unsigned) sixteen bit number returned by the device in CPU * endianness, or else a negative error code. */ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd) { ssize_t status; __be16 result; status = spi_write_then_read(spi, &cmd, 1, &result, 2); if (status < 0) return status; return be16_to_cpu(result); } /*---------------------------------------------------------------------------*/ /* * INTERFACE between board init code and SPI infrastructure. * * No SPI driver ever sees these SPI device table segments, but * it's how the SPI core (or adapters that get hotplugged) grows * the driver model tree. * * As a rule, SPI devices can't be probed. Instead, board init code * provides a table listing the devices which are present, with enough * information to bind and set up the device's driver. There's basic * support for non-static configurations too; enough to handle adding * parport adapters, or microcontrollers acting as USB-to-SPI bridges. */ /** * struct spi_board_info - board-specific template for a SPI device * @modalias: Initializes spi_device.modalias; identifies the driver. * @platform_data: Initializes spi_device.platform_data; the particular * data stored there is driver-specific. * @swnode: Software node for the device. * @controller_data: Initializes spi_device.controller_data; some * controllers need hints about hardware setup, e.g. for DMA. * @irq: Initializes spi_device.irq; depends on how the board is wired. * @max_speed_hz: Initializes spi_device.max_speed_hz; based on limits * from the chip datasheet and board-specific signal quality issues. * @bus_num: Identifies which spi_controller parents the spi_device; unused * by spi_new_device(), and otherwise depends on board wiring. * @chip_select: Initializes spi_device.chip_select; depends on how * the board is wired. * @mode: Initializes spi_device.mode; based on the chip datasheet, board * wiring (some devices support both 3WIRE and standard modes), and * possibly presence of an inverter in the chipselect path. * * When adding new SPI devices to the device tree, these structures serve * as a partial device template. They hold information which can't always * be determined by drivers. Information that probe() can establish (such * as the default transfer wordsize) is not included here. * * These structures are used in two places. Their primary role is to * be stored in tables of board-specific device descriptors, which are * declared early in board initialization and then used (much later) to * populate a controller's device tree after the that controller's driver * initializes. A secondary (and atypical) role is as a parameter to * spi_new_device() call, which happens after those controller drivers * are active in some dynamic board configuration models. */ struct spi_board_info { /* * The device name and module name are coupled, like platform_bus; * "modalias" is normally the driver name. * * platform_data goes to spi_device.dev.platform_data, * controller_data goes to spi_device.controller_data, * IRQ is copied too. */ char modalias[SPI_NAME_SIZE]; const void *platform_data; const struct software_node *swnode; void *controller_data; int irq; /* Slower signaling on noisy or low voltage boards */ u32 max_speed_hz; /* * bus_num is board specific and matches the bus_num of some * spi_controller that will probably be registered later. * * chip_select reflects how this chip is wired to that master; * it's less than num_chipselect. */ u16 bus_num; u16 chip_select; /* * mode becomes spi_device.mode, and is essential for chips * where the default of SPI_CS_HIGH = 0 is wrong. */ u32 mode; /* * ... may need additional spi_device chip config data here. * avoid stuff protocol drivers can set; but include stuff * needed to behave without being bound to a driver: * - quirks like clock rate mattering when not selected */ }; #ifdef CONFIG_SPI extern int spi_register_board_info(struct spi_board_info const *info, unsigned n); #else /* Board init code may ignore whether SPI is configured or not */ static inline int spi_register_board_info(struct spi_board_info const *info, unsigned n) { return 0; } #endif /* * If you're hotplugging an adapter with devices (parport, USB, etc) * use spi_new_device() to describe each device. You can also call * spi_unregister_device() to start making that device vanish, but * normally that would be handled by spi_unregister_controller(). * * You can also use spi_alloc_device() and spi_add_device() to use a two * stage registration sequence for each spi_device. This gives the caller * some more control over the spi_device structure before it is registered, * but requires that caller to initialize fields that would otherwise * be defined using the board info. */ extern struct spi_device * spi_alloc_device(struct spi_controller *ctlr); extern int spi_add_device(struct spi_device *spi); extern struct spi_device * spi_new_device(struct spi_controller *, struct spi_board_info *); extern void spi_unregister_device(struct spi_device *spi); extern const struct spi_device_id * spi_get_device_id(const struct spi_device *sdev); extern const void * spi_get_device_match_data(const struct spi_device *sdev); static inline bool spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer) { return list_is_last(&xfer->transfer_list, &ctlr->cur_msg->transfers); } #endif /* __LINUX_SPI_H */
23968 23977 23974 23917 23943 23905 23917 13979 13990 22504 22490 13979 14052 22589 16939 16997 22586 22480 22588 16994 16928 16932 16918 16930 16918 16998 16925 16998 22505 22486 22591 57 56 188 188 186 188 56 57 57 57 57 57 56 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 // SPDX-License-Identifier: GPL-2.0 #include <linux/debugfs.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/memblock.h> #include <linux/stacktrace.h> #include <linux/page_owner.h> #include <linux/jump_label.h> #include <linux/migrate.h> #include <linux/stackdepot.h> #include <linux/seq_file.h> #include <linux/memcontrol.h> #include <linux/sched/clock.h> #include "internal.h" /* * TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack) * to use off stack temporal storage */ #define PAGE_OWNER_STACK_DEPTH (16) struct page_owner { unsigned short order; short last_migrate_reason; gfp_t gfp_mask; depot_stack_handle_t handle; depot_stack_handle_t free_handle; u64 ts_nsec; u64 free_ts_nsec; char comm[TASK_COMM_LEN]; pid_t pid; pid_t tgid; pid_t free_pid; pid_t free_tgid; }; struct stack { struct stack_record *stack_record; struct stack *next; }; static struct stack dummy_stack; static struct stack failure_stack; static struct stack *stack_list; static DEFINE_SPINLOCK(stack_list_lock); static bool page_owner_enabled __initdata; DEFINE_STATIC_KEY_FALSE(page_owner_inited); static depot_stack_handle_t dummy_handle; static depot_stack_handle_t failure_handle; static depot_stack_handle_t early_handle; static void init_early_allocated_pages(void); static inline void set_current_in_page_owner(void) { /* * Avoid recursion. * * We might need to allocate more memory from page_owner code, so make * sure to signal it in order to avoid recursion. */ current->in_page_owner = 1; } static inline void unset_current_in_page_owner(void) { current->in_page_owner = 0; } static int __init early_page_owner_param(char *buf) { int ret = kstrtobool(buf, &page_owner_enabled); if (page_owner_enabled) stack_depot_request_early_init(); return ret; } early_param("page_owner", early_page_owner_param); static __init bool need_page_owner(void) { return page_owner_enabled; } static __always_inline depot_stack_handle_t create_dummy_stack(void) { unsigned long entries[4]; unsigned int nr_entries; nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0); return stack_depot_save(entries, nr_entries, GFP_KERNEL); } static noinline void register_dummy_stack(void) { dummy_handle = create_dummy_stack(); } static noinline void register_failure_stack(void) { failure_handle = create_dummy_stack(); } static noinline void register_early_stack(void) { early_handle = create_dummy_stack(); } static __init void init_page_owner(void) { if (!page_owner_enabled) return; register_dummy_stack(); register_failure_stack(); register_early_stack(); init_early_allocated_pages(); /* Initialize dummy and failure stacks and link them to stack_list */ dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle); failure_stack.stack_record = __stack_depot_get_stack_record(failure_handle); if (dummy_stack.stack_record) refcount_set(&dummy_stack.stack_record->count, 1); if (failure_stack.stack_record) refcount_set(&failure_stack.stack_record->count, 1); dummy_stack.next = &failure_stack; stack_list = &dummy_stack; static_branch_enable(&page_owner_inited); } struct page_ext_operations page_owner_ops = { .size = sizeof(struct page_owner), .need = need_page_owner, .init = init_page_owner, .need_shared_flags = true, }; static inline struct page_owner *get_page_owner(struct page_ext *page_ext) { return page_ext_data(page_ext, &page_owner_ops); } static noinline depot_stack_handle_t save_stack(gfp_t flags) { unsigned long entries[PAGE_OWNER_STACK_DEPTH]; depot_stack_handle_t handle; unsigned int nr_entries; if (current->in_page_owner) return dummy_handle; set_current_in_page_owner(); nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2); handle = stack_depot_save(entries, nr_entries, flags); if (!handle) handle = failure_handle; unset_current_in_page_owner(); return handle; } static void add_stack_record_to_list(struct stack_record *stack_record, gfp_t gfp_mask) { unsigned long flags; struct stack *stack; set_current_in_page_owner(); stack = kmalloc(sizeof(*stack), gfp_nested_mask(gfp_mask)); if (!stack) { unset_current_in_page_owner(); return; } unset_current_in_page_owner(); stack->stack_record = stack_record; stack->next = NULL; spin_lock_irqsave(&stack_list_lock, flags); stack->next = stack_list; /* * This pairs with smp_load_acquire() from function * stack_start(). This guarantees that stack_start() * will see an updated stack_list before starting to * traverse the list. */ smp_store_release(&stack_list, stack); spin_unlock_irqrestore(&stack_list_lock, flags); } static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask, int nr_base_pages) { struct stack_record *stack_record = __stack_depot_get_stack_record(handle); if (!stack_record) return; /* * New stack_record's that do not use STACK_DEPOT_FLAG_GET start * with REFCOUNT_SATURATED to catch spurious increments of their * refcount. * Since we do not use STACK_DEPOT_FLAG_GET API, let us * set a refcount of 1 ourselves. */ if (refcount_read(&stack_record->count) == REFCOUNT_SATURATED) { int old = REFCOUNT_SATURATED; if (atomic_try_cmpxchg_relaxed(&stack_record->count.refs, &old, 1)) /* Add the new stack_record to our list */ add_stack_record_to_list(stack_record, gfp_mask); } refcount_add(nr_base_pages, &stack_record->count); } static void dec_stack_record_count(depot_stack_handle_t handle, int nr_base_pages) { struct stack_record *stack_record = __stack_depot_get_stack_record(handle); if (!stack_record) return; if (refcount_sub_and_test(nr_base_pages, &stack_record->count)) pr_warn("%s: refcount went to 0 for %u handle\n", __func__, handle); } static inline void __update_page_owner_handle(struct page_ext *page_ext, depot_stack_handle_t handle, unsigned short order, gfp_t gfp_mask, short last_migrate_reason, u64 ts_nsec, pid_t pid, pid_t tgid, char *comm) { int i; struct page_owner *page_owner; for (i = 0; i < (1 << order); i++) { page_owner = get_page_owner(page_ext); page_owner->handle = handle; page_owner->order = order; page_owner->gfp_mask = gfp_mask; page_owner->last_migrate_reason = last_migrate_reason; page_owner->pid = pid; page_owner->tgid = tgid; page_owner->ts_nsec = ts_nsec; strscpy(page_owner->comm, comm, sizeof(page_owner->comm)); __set_bit(PAGE_EXT_OWNER, &page_ext->flags); __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); page_ext = page_ext_next(page_ext); } } static inline void __update_page_owner_free_handle(struct page_ext *page_ext, depot_stack_handle_t handle, unsigned short order, pid_t pid, pid_t tgid, u64 free_ts_nsec) { int i; struct page_owner *page_owner; for (i = 0; i < (1 << order); i++) { page_owner = get_page_owner(page_ext); /* Only __reset_page_owner() wants to clear the bit */ if (handle) { __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); page_owner->free_handle = handle; } page_owner->free_ts_nsec = free_ts_nsec; page_owner->free_pid = current->pid; page_owner->free_tgid = current->tgid; page_ext = page_ext_next(page_ext); } } void __reset_page_owner(struct page *page, unsigned short order) { struct page_ext *page_ext; depot_stack_handle_t handle; depot_stack_handle_t alloc_handle; struct page_owner *page_owner; u64 free_ts_nsec = local_clock(); page_ext = page_ext_get(page); if (unlikely(!page_ext)) return; page_owner = get_page_owner(page_ext); alloc_handle = page_owner->handle; handle = save_stack(GFP_NOWAIT | __GFP_NOWARN); __update_page_owner_free_handle(page_ext, handle, order, current->pid, current->tgid, free_ts_nsec); page_ext_put(page_ext); if (alloc_handle != early_handle) /* * early_handle is being set as a handle for all those * early allocated pages. See init_pages_in_zone(). * Since their refcount is not being incremented because * the machinery is not ready yet, we cannot decrement * their refcount either. */ dec_stack_record_count(alloc_handle, 1 << order); } noinline void __set_page_owner(struct page *page, unsigned short order, gfp_t gfp_mask) { struct page_ext *page_ext; u64 ts_nsec = local_clock(); depot_stack_handle_t handle; handle = save_stack(gfp_mask); page_ext = page_ext_get(page); if (unlikely(!page_ext)) return; __update_page_owner_handle(page_ext, handle, order, gfp_mask, -1, ts_nsec, current->pid, current->tgid, current->comm); page_ext_put(page_ext); inc_stack_record_count(handle, gfp_mask, 1 << order); } void __set_page_owner_migrate_reason(struct page *page, int reason) { struct page_ext *page_ext = page_ext_get(page); struct page_owner *page_owner; if (unlikely(!page_ext)) return; page_owner = get_page_owner(page_ext); page_owner->last_migrate_reason = reason; page_ext_put(page_ext); } void __split_page_owner(struct page *page, int old_order, int new_order) { int i; struct page_ext *page_ext = page_ext_get(page); struct page_owner *page_owner; if (unlikely(!page_ext)) return; for (i = 0; i < (1 << old_order); i++) { page_owner = get_page_owner(page_ext); page_owner->order = new_order; page_ext = page_ext_next(page_ext); } page_ext_put(page_ext); } void __folio_copy_owner(struct folio *newfolio, struct folio *old) { int i; struct page_ext *old_ext; struct page_ext *new_ext; struct page_owner *old_page_owner; struct page_owner *new_page_owner; depot_stack_handle_t migrate_handle; old_ext = page_ext_get(&old->page); if (unlikely(!old_ext)) return; new_ext = page_ext_get(&newfolio->page); if (unlikely(!new_ext)) { page_ext_put(old_ext); return; } old_page_owner = get_page_owner(old_ext); new_page_owner = get_page_owner(new_ext); migrate_handle = new_page_owner->handle; __update_page_owner_handle(new_ext, old_page_owner->handle, old_page_owner->order, old_page_owner->gfp_mask, old_page_owner->last_migrate_reason, old_page_owner->ts_nsec, old_page_owner->pid, old_page_owner->tgid, old_page_owner->comm); /* * Do not proactively clear PAGE_EXT_OWNER{_ALLOCATED} bits as the folio * will be freed after migration. Keep them until then as they may be * useful. */ __update_page_owner_free_handle(new_ext, 0, old_page_owner->order, old_page_owner->free_pid, old_page_owner->free_tgid, old_page_owner->free_ts_nsec); /* * We linked the original stack to the new folio, we need to do the same * for the new one and the old folio otherwise there will be an imbalance * when subtracting those pages from the stack. */ for (i = 0; i < (1 << new_page_owner->order); i++) { old_page_owner->handle = migrate_handle; old_ext = page_ext_next(old_ext); old_page_owner = get_page_owner(old_ext); } page_ext_put(new_ext); page_ext_put(old_ext); } void pagetypeinfo_showmixedcount_print(struct seq_file *m, pg_data_t *pgdat, struct zone *zone) { struct page *page; struct page_ext *page_ext; struct page_owner *page_owner; unsigned long pfn, block_end_pfn; unsigned long end_pfn = zone_end_pfn(zone); unsigned long count[MIGRATE_TYPES] = { 0, }; int pageblock_mt, page_mt; int i; /* Scan block by block. First and last block may be incomplete */ pfn = zone->zone_start_pfn; /* * Walk the zone in pageblock_nr_pages steps. If a page block spans * a zone boundary, it will be double counted between zones. This does * not matter as the mixed block count will still be correct */ for (; pfn < end_pfn; ) { page = pfn_to_online_page(pfn); if (!page) { pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES); continue; } block_end_pfn = pageblock_end_pfn(pfn); block_end_pfn = min(block_end_pfn, end_pfn); pageblock_mt = get_pageblock_migratetype(page); for (; pfn < block_end_pfn; pfn++) { /* The pageblock is online, no need to recheck. */ page = pfn_to_page(pfn); if (page_zone(page) != zone) continue; if (PageBuddy(page)) { unsigned long freepage_order; freepage_order = buddy_order_unsafe(page); if (freepage_order <= MAX_PAGE_ORDER) pfn += (1UL << freepage_order) - 1; continue; } if (PageReserved(page)) continue; page_ext = page_ext_get(page); if (unlikely(!page_ext)) continue; if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) goto ext_put_continue; page_owner = get_page_owner(page_ext); page_mt = gfp_migratetype(page_owner->gfp_mask); if (pageblock_mt != page_mt) { if (is_migrate_cma(pageblock_mt)) count[MIGRATE_MOVABLE]++; else count[pageblock_mt]++; pfn = block_end_pfn; page_ext_put(page_ext); break; } pfn += (1UL << page_owner->order) - 1; ext_put_continue: page_ext_put(page_ext); } } /* Print counts */ seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); for (i = 0; i < MIGRATE_TYPES; i++) seq_printf(m, "%12lu ", count[i]); seq_putc(m, '\n'); } /* * Looking for memcg information and print it out */ static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret, struct page *page) { #ifdef CONFIG_MEMCG unsigned long memcg_data; struct mem_cgroup *memcg; bool online; char name[80]; rcu_read_lock(); memcg_data = READ_ONCE(page->memcg_data); if (!memcg_data) goto out_unlock; if (memcg_data & MEMCG_DATA_OBJEXTS) ret += scnprintf(kbuf + ret, count - ret, "Slab cache page\n"); memcg = page_memcg_check(page); if (!memcg) goto out_unlock; online = (memcg->css.flags & CSS_ONLINE); cgroup_name(memcg->css.cgroup, name, sizeof(name)); ret += scnprintf(kbuf + ret, count - ret, "Charged %sto %smemcg %s\n", PageMemcgKmem(page) ? "(via objcg) " : "", online ? "" : "offline ", name); out_unlock: rcu_read_unlock(); #endif /* CONFIG_MEMCG */ return ret; } static ssize_t print_page_owner(char __user *buf, size_t count, unsigned long pfn, struct page *page, struct page_owner *page_owner, depot_stack_handle_t handle) { int ret, pageblock_mt, page_mt; char *kbuf; count = min_t(size_t, count, PAGE_SIZE); kbuf = kmalloc(count, GFP_KERNEL); if (!kbuf) return -ENOMEM; ret = scnprintf(kbuf, count, "Page allocated via order %u, mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu ns\n", page_owner->order, page_owner->gfp_mask, &page_owner->gfp_mask, page_owner->pid, page_owner->tgid, page_owner->comm, page_owner->ts_nsec); /* Print information relevant to grouping pages by mobility */ pageblock_mt = get_pageblock_migratetype(page); page_mt = gfp_migratetype(page_owner->gfp_mask); ret += scnprintf(kbuf + ret, count - ret, "PFN 0x%lx type %s Block %lu type %s Flags %pGp\n", pfn, migratetype_names[page_mt], pfn >> pageblock_order, migratetype_names[pageblock_mt], &page->flags); ret += stack_depot_snprint(handle, kbuf + ret, count - ret, 0); if (ret >= count) goto err; if (page_owner->last_migrate_reason != -1) { ret += scnprintf(kbuf + ret, count - ret, "Page has been migrated, last migrate reason: %s\n", migrate_reason_names[page_owner->last_migrate_reason]); } ret = print_page_owner_memcg(kbuf, count, ret, page); ret += snprintf(kbuf + ret, count - ret, "\n"); if (ret >= count) goto err; if (copy_to_user(buf, kbuf, ret)) ret = -EFAULT; kfree(kbuf); return ret; err: kfree(kbuf); return -ENOMEM; } void __dump_page_owner(const struct page *page) { struct page_ext *page_ext = page_ext_get((void *)page); struct page_owner *page_owner; depot_stack_handle_t handle; gfp_t gfp_mask; int mt; if (unlikely(!page_ext)) { pr_alert("There is not page extension available.\n"); return; } page_owner = get_page_owner(page_ext); gfp_mask = page_owner->gfp_mask; mt = gfp_migratetype(gfp_mask); if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { pr_alert("page_owner info is not present (never set?)\n"); page_ext_put(page_ext); return; } if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) pr_alert("page_owner tracks the page as allocated\n"); else pr_alert("page_owner tracks the page as freed\n"); pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu, free_ts %llu\n", page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask, page_owner->pid, page_owner->tgid, page_owner->comm, page_owner->ts_nsec, page_owner->free_ts_nsec); handle = READ_ONCE(page_owner->handle); if (!handle) pr_alert("page_owner allocation stack trace missing\n"); else stack_depot_print(handle); handle = READ_ONCE(page_owner->free_handle); if (!handle) { pr_alert("page_owner free stack trace missing\n"); } else { pr_alert("page last free pid %d tgid %d stack trace:\n", page_owner->free_pid, page_owner->free_tgid); stack_depot_print(handle); } if (page_owner->last_migrate_reason != -1) pr_alert("page has been migrated, last migrate reason: %s\n", migrate_reason_names[page_owner->last_migrate_reason]); page_ext_put(page_ext); } static ssize_t read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) { unsigned long pfn; struct page *page; struct page_ext *page_ext; struct page_owner *page_owner; depot_stack_handle_t handle; if (!static_branch_unlikely(&page_owner_inited)) return -EINVAL; page = NULL; if (*ppos == 0) pfn = min_low_pfn; else pfn = *ppos; /* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */ while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0) pfn++; /* Find an allocated page */ for (; pfn < max_pfn; pfn++) { /* * This temporary page_owner is required so * that we can avoid the context switches while holding * the rcu lock and copying the page owner information to * user through copy_to_user() or GFP_KERNEL allocations. */ struct page_owner page_owner_tmp; /* * If the new page is in a new MAX_ORDER_NR_PAGES area, * validate the area as existing, skip it if not */ if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) { pfn += MAX_ORDER_NR_PAGES - 1; continue; } page = pfn_to_page(pfn); if (PageBuddy(page)) { unsigned long freepage_order = buddy_order_unsafe(page); if (freepage_order <= MAX_PAGE_ORDER) pfn += (1UL << freepage_order) - 1; continue; } page_ext = page_ext_get(page); if (unlikely(!page_ext)) continue; /* * Some pages could be missed by concurrent allocation or free, * because we don't hold the zone lock. */ if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) goto ext_put_continue; /* * Although we do have the info about past allocation of free * pages, it's not relevant for current memory usage. */ if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) goto ext_put_continue; page_owner = get_page_owner(page_ext); /* * Don't print "tail" pages of high-order allocations as that * would inflate the stats. */ if (!IS_ALIGNED(pfn, 1 << page_owner->order)) goto ext_put_continue; /* * Access to page_ext->handle isn't synchronous so we should * be careful to access it. */ handle = READ_ONCE(page_owner->handle); if (!handle) goto ext_put_continue; /* Record the next PFN to read in the file offset */ *ppos = pfn + 1; page_owner_tmp = *page_owner; page_ext_put(page_ext); return print_page_owner(buf, count, pfn, page, &page_owner_tmp, handle); ext_put_continue: page_ext_put(page_ext); } return 0; } static loff_t lseek_page_owner(struct file *file, loff_t offset, int orig) { switch (orig) { case SEEK_SET: file->f_pos = offset; break; case SEEK_CUR: file->f_pos += offset; break; default: return -EINVAL; } return file->f_pos; } static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) { unsigned long pfn = zone->zone_start_pfn; unsigned long end_pfn = zone_end_pfn(zone); unsigned long count = 0; /* * Walk the zone in pageblock_nr_pages steps. If a page block spans * a zone boundary, it will be double counted between zones. This does * not matter as the mixed block count will still be correct */ for (; pfn < end_pfn; ) { unsigned long block_end_pfn; if (!pfn_valid(pfn)) { pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES); continue; } block_end_pfn = pageblock_end_pfn(pfn); block_end_pfn = min(block_end_pfn, end_pfn); for (; pfn < block_end_pfn; pfn++) { struct page *page = pfn_to_page(pfn); struct page_ext *page_ext; if (page_zone(page) != zone) continue; /* * To avoid having to grab zone->lock, be a little * careful when reading buddy page order. The only * danger is that we skip too much and potentially miss * some early allocated pages, which is better than * heavy lock contention. */ if (PageBuddy(page)) { unsigned long order = buddy_order_unsafe(page); if (order > 0 && order <= MAX_PAGE_ORDER) pfn += (1UL << order) - 1; continue; } if (PageReserved(page)) continue; page_ext = page_ext_get(page); if (unlikely(!page_ext)) continue; /* Maybe overlapping zone */ if (test_bit(PAGE_EXT_OWNER, &page_ext->flags)) goto ext_put_continue; /* Found early allocated page */ __update_page_owner_handle(page_ext, early_handle, 0, 0, -1, local_clock(), current->pid, current->tgid, current->comm); count++; ext_put_continue: page_ext_put(page_ext); } cond_resched(); } pr_info("Node %d, zone %8s: page owner found early allocated %lu pages\n", pgdat->node_id, zone->name, count); } static void init_zones_in_node(pg_data_t *pgdat) { struct zone *zone; struct zone *node_zones = pgdat->node_zones; for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { if (!populated_zone(zone)) continue; init_pages_in_zone(pgdat, zone); } } static void init_early_allocated_pages(void) { pg_data_t *pgdat; for_each_online_pgdat(pgdat) init_zones_in_node(pgdat); } static const struct file_operations proc_page_owner_operations = { .read = read_page_owner, .llseek = lseek_page_owner, }; static void *stack_start(struct seq_file *m, loff_t *ppos) { struct stack *stack; if (*ppos == -1UL) return NULL; if (!*ppos) { /* * This pairs with smp_store_release() from function * add_stack_record_to_list(), so we get a consistent * value of stack_list. */ stack = smp_load_acquire(&stack_list); m->private = stack; } else { stack = m->private; } return stack; } static void *stack_next(struct seq_file *m, void *v, loff_t *ppos) { struct stack *stack = v; stack = stack->next; *ppos = stack ? *ppos + 1 : -1UL; m->private = stack; return stack; } static unsigned long page_owner_pages_threshold; static int stack_print(struct seq_file *m, void *v) { int i, nr_base_pages; struct stack *stack = v; unsigned long *entries; unsigned long nr_entries; struct stack_record *stack_record = stack->stack_record; if (!stack->stack_record) return 0; nr_entries = stack_record->size; entries = stack_record->entries; nr_base_pages = refcount_read(&stack_record->count) - 1; if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold) return 0; for (i = 0; i < nr_entries; i++) seq_printf(m, " %pS\n", (void *)entries[i]); seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages); return 0; } static void stack_stop(struct seq_file *m, void *v) { } static const struct seq_operations page_owner_stack_op = { .start = stack_start, .next = stack_next, .stop = stack_stop, .show = stack_print }; static int page_owner_stack_open(struct inode *inode, struct file *file) { return seq_open_private(file, &page_owner_stack_op, 0); } static const struct file_operations page_owner_stack_operations = { .open = page_owner_stack_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; static int page_owner_threshold_get(void *data, u64 *val) { *val = READ_ONCE(page_owner_pages_threshold); return 0; } static int page_owner_threshold_set(void *data, u64 val) { WRITE_ONCE(page_owner_pages_threshold, val); return 0; } DEFINE_SIMPLE_ATTRIBUTE(proc_page_owner_threshold, &page_owner_threshold_get, &page_owner_threshold_set, "%llu"); static int __init pageowner_init(void) { struct dentry *dir; if (!static_branch_unlikely(&page_owner_inited)) { pr_info("page_owner is disabled\n"); return 0; } debugfs_create_file("page_owner", 0400, NULL, NULL, &proc_page_owner_operations); dir = debugfs_create_dir("page_owner_stacks", NULL); debugfs_create_file("show_stacks", 0400, dir, NULL, &page_owner_stack_operations); debugfs_create_file("count_threshold", 0600, dir, NULL, &proc_page_owner_threshold); return 0; } late_initcall(pageowner_init)
5 7 7 7 1 6 6 7 3 3 3 2 1 1 1 5 2 1 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2004-2005 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_dir2.h" #include "xfs_export.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_icache.h" #include "xfs_pnfs.h" /* * Note that we only accept fileids which are long enough rather than allow * the parent generation number to default to zero. XFS considers zero a * valid generation number not an invalid/wildcard value. */ static int xfs_fileid_length(int fileid_type) { switch (fileid_type) { case FILEID_INO32_GEN: return 2; case FILEID_INO32_GEN_PARENT: return 4; case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: return 3; case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: return 6; } return FILEID_INVALID; } STATIC int xfs_fs_encode_fh( struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) { struct xfs_mount *mp = XFS_M(inode->i_sb); struct fid *fid = (struct fid *)fh; struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; int fileid_type; int len; /* Directories don't need their parent encoded, they have ".." */ if (!parent) fileid_type = FILEID_INO32_GEN; else fileid_type = FILEID_INO32_GEN_PARENT; /* * If the filesystem may contain 64bit inode numbers, we need * to use larger file handles that can represent them. * * While we only allocate inodes that do not fit into 32 bits any * large enough filesystem may contain them, thus the slightly * confusing looking conditional below. */ if (!xfs_has_small_inums(mp) || xfs_is_inode32(mp)) fileid_type |= XFS_FILEID_TYPE_64FLAG; /* * Only encode if there is enough space given. In practice * this means we can't export a filesystem with 64bit inodes * over NFSv2 with the subtree_check export option; the other * seven combinations work. The real answer is "don't use v2". */ len = xfs_fileid_length(fileid_type); if (*max_len < len) { *max_len = len; return FILEID_INVALID; } *max_len = len; switch (fileid_type) { case FILEID_INO32_GEN_PARENT: fid->i32.parent_ino = XFS_I(parent)->i_ino; fid->i32.parent_gen = parent->i_generation; fallthrough; case FILEID_INO32_GEN: fid->i32.ino = XFS_I(inode)->i_ino; fid->i32.gen = inode->i_generation; break; case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: fid64->parent_ino = XFS_I(parent)->i_ino; fid64->parent_gen = parent->i_generation; fallthrough; case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: fid64->ino = XFS_I(inode)->i_ino; fid64->gen = inode->i_generation; break; } return fileid_type; } struct inode * xfs_nfs_get_inode( struct super_block *sb, u64 ino, u32 generation) { xfs_mount_t *mp = XFS_M(sb); xfs_inode_t *ip; int error; /* * NFS can sometimes send requests for ino 0. Fail them gracefully. */ if (ino == 0) return ERR_PTR(-ESTALE); /* * The XFS_IGET_UNTRUSTED means that an invalid inode number is just * fine and not an indication of a corrupted filesystem as clients can * send invalid file handles and we have to handle it gracefully.. */ error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip); if (error) { /* * EINVAL means the inode cluster doesn't exist anymore. * EFSCORRUPTED means the metadata pointing to the inode cluster * or the inode cluster itself is corrupt. This implies the * filehandle is stale, so we should translate it here. * We don't use ESTALE directly down the chain to not * confuse applications using bulkstat that expect EINVAL. */ switch (error) { case -EINVAL: case -ENOENT: case -EFSCORRUPTED: error = -ESTALE; break; default: break; } return ERR_PTR(error); } /* * Reload the incore unlinked list to avoid failure in inodegc. * Use an unlocked check here because unrecovered unlinked inodes * should be somewhat rare. */ if (xfs_inode_unlinked_incomplete(ip)) { error = xfs_inode_reload_unlinked(ip); if (error) { xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); xfs_irele(ip); return ERR_PTR(error); } } if (VFS_I(ip)->i_generation != generation || IS_PRIVATE(VFS_I(ip))) { xfs_irele(ip); return ERR_PTR(-ESTALE); } return VFS_I(ip); } STATIC struct dentry * xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fileid_type) { struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; struct inode *inode = NULL; if (fh_len < xfs_fileid_length(fileid_type)) return NULL; switch (fileid_type) { case FILEID_INO32_GEN_PARENT: case FILEID_INO32_GEN: inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen); break; case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen); break; } return d_obtain_alias(inode); } STATIC struct dentry * xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fileid_type) { struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; struct inode *inode = NULL; if (fh_len < xfs_fileid_length(fileid_type)) return NULL; switch (fileid_type) { case FILEID_INO32_GEN_PARENT: inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino, fid->i32.parent_gen); break; case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: inode = xfs_nfs_get_inode(sb, fid64->parent_ino, fid64->parent_gen); break; } return d_obtain_alias(inode); } STATIC struct dentry * xfs_fs_get_parent( struct dentry *child) { int error; struct xfs_inode *cip; error = xfs_lookup(XFS_I(d_inode(child)), &xfs_name_dotdot, &cip, NULL); if (unlikely(error)) return ERR_PTR(error); return d_obtain_alias(VFS_I(cip)); } STATIC int xfs_fs_nfs_commit_metadata( struct inode *inode) { return xfs_log_force_inode(XFS_I(inode)); } const struct export_operations xfs_export_operations = { .encode_fh = xfs_fs_encode_fh, .fh_to_dentry = xfs_fs_fh_to_dentry, .fh_to_parent = xfs_fs_fh_to_parent, .get_parent = xfs_fs_get_parent, .commit_metadata = xfs_fs_nfs_commit_metadata, #ifdef CONFIG_EXPORTFS_BLOCK_OPS .get_uuid = xfs_fs_get_uuid, .map_blocks = xfs_fs_map_blocks, .commit_blocks = xfs_fs_commit_blocks, #endif };
51 51 15 17 11 10 1 11 131 130 107 106 128 26 26 27 22 27 26 23 6 26 26 1 26 9 25 27 2 2 6 16 16 16 7 6 6 6 6 7 7 11 11 10 6 4 9 5 236 30 209 4 208 209 208 208 1 203 237 16 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2021 Intel Corporation * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES * * iommufd provides control over the IOMMU HW objects created by IOMMU kernel * drivers. IOMMU HW objects revolve around IO page tables that map incoming DMA * addresses (IOVA) to CPU addresses. */ #define pr_fmt(fmt) "iommufd: " fmt #include <linux/bug.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/iommufd.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/slab.h> #include <uapi/linux/iommufd.h> #include "io_pagetable.h" #include "iommufd_private.h" #include "iommufd_test.h" struct iommufd_object_ops { void (*destroy)(struct iommufd_object *obj); void (*abort)(struct iommufd_object *obj); }; static const struct iommufd_object_ops iommufd_object_ops[]; static struct miscdevice vfio_misc_dev; /* * Allow concurrent access to the object. * * Once another thread can see the object pointer it can prevent object * destruction. Expect for special kernel-only objects there is no in-kernel way * to reliably destroy a single object. Thus all APIs that are creating objects * must use iommufd_object_abort() to handle their errors and only call * iommufd_object_finalize() once object creation cannot fail. */ void iommufd_object_finalize(struct iommufd_ctx *ictx, struct iommufd_object *obj) { XA_STATE(xas, &ictx->objects, obj->id); void *old; xa_lock(&ictx->objects); old = xas_store(&xas, obj); xa_unlock(&ictx->objects); /* obj->id was returned from xa_alloc() so the xas_store() cannot fail */ WARN_ON(old != XA_ZERO_ENTRY); } /* Undo _iommufd_object_alloc() if iommufd_object_finalize() was not called */ void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj) { XA_STATE(xas, &ictx->objects, obj->id); void *old; xa_lock(&ictx->objects); old = xas_store(&xas, NULL); xa_unlock(&ictx->objects); WARN_ON(old != XA_ZERO_ENTRY); kfree(obj); } /* * Abort an object that has been fully initialized and needs destroy, but has * not been finalized. */ void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, struct iommufd_object *obj) { if (iommufd_object_ops[obj->type].abort) iommufd_object_ops[obj->type].abort(obj); else iommufd_object_ops[obj->type].destroy(obj); iommufd_object_abort(ictx, obj); } struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, enum iommufd_object_type type) { struct iommufd_object *obj; if (iommufd_should_fail()) return ERR_PTR(-ENOENT); xa_lock(&ictx->objects); obj = xa_load(&ictx->objects, id); if (!obj || (type != IOMMUFD_OBJ_ANY && obj->type != type) || !iommufd_lock_obj(obj)) obj = ERR_PTR(-ENOENT); xa_unlock(&ictx->objects); return obj; } static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx, struct iommufd_object *to_destroy) { if (refcount_dec_and_test(&to_destroy->shortterm_users)) return 0; if (wait_event_timeout(ictx->destroy_wait, refcount_read(&to_destroy->shortterm_users) == 0, msecs_to_jiffies(60000))) return 0; pr_crit("Time out waiting for iommufd object to become free\n"); refcount_inc(&to_destroy->shortterm_users); return -EBUSY; } /* * Remove the given object id from the xarray if the only reference to the * object is held by the xarray. */ int iommufd_object_remove(struct iommufd_ctx *ictx, struct iommufd_object *to_destroy, u32 id, unsigned int flags) { struct iommufd_object *obj; XA_STATE(xas, &ictx->objects, id); bool zerod_shortterm = false; int ret; /* * The purpose of the shortterm_users is to ensure deterministic * destruction of objects used by external drivers and destroyed by this * function. Any temporary increment of the refcount must increment * shortterm_users, such as during ioctl execution. */ if (flags & REMOVE_WAIT_SHORTTERM) { ret = iommufd_object_dec_wait_shortterm(ictx, to_destroy); if (ret) { /* * We have a bug. Put back the callers reference and * defer cleaning this object until close. */ refcount_dec(&to_destroy->users); return ret; } zerod_shortterm = true; } xa_lock(&ictx->objects); obj = xas_load(&xas); if (to_destroy) { /* * If the caller is holding a ref on obj we put it here under * the spinlock. */ refcount_dec(&obj->users); if (WARN_ON(obj != to_destroy)) { ret = -ENOENT; goto err_xa; } } else if (xa_is_zero(obj) || !obj) { ret = -ENOENT; goto err_xa; } if (!refcount_dec_if_one(&obj->users)) { ret = -EBUSY; goto err_xa; } xas_store(&xas, NULL); if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj)) ictx->vfio_ioas = NULL; xa_unlock(&ictx->objects); /* * Since users is zero any positive users_shortterm must be racing * iommufd_put_object(), or we have a bug. */ if (!zerod_shortterm) { ret = iommufd_object_dec_wait_shortterm(ictx, obj); if (WARN_ON(ret)) return ret; } iommufd_object_ops[obj->type].destroy(obj); kfree(obj); return 0; err_xa: if (zerod_shortterm) { /* Restore the xarray owned reference */ refcount_set(&obj->shortterm_users, 1); } xa_unlock(&ictx->objects); /* The returned object reference count is zero */ return ret; } static int iommufd_destroy(struct iommufd_ucmd *ucmd) { struct iommu_destroy *cmd = ucmd->cmd; return iommufd_object_remove(ucmd->ictx, NULL, cmd->id, 0); } static int iommufd_fops_open(struct inode *inode, struct file *filp) { struct iommufd_ctx *ictx; ictx = kzalloc(sizeof(*ictx), GFP_KERNEL_ACCOUNT); if (!ictx) return -ENOMEM; /* * For compatibility with VFIO when /dev/vfio/vfio is opened we default * to the same rlimit accounting as vfio uses. */ if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER) && filp->private_data == &vfio_misc_dev) { ictx->account_mode = IOPT_PAGES_ACCOUNT_MM; pr_info_once("IOMMUFD is providing /dev/vfio/vfio, not VFIO.\n"); } init_rwsem(&ictx->ioas_creation_lock); xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); xa_init(&ictx->groups); ictx->file = filp; init_waitqueue_head(&ictx->destroy_wait); filp->private_data = ictx; return 0; } static int iommufd_fops_release(struct inode *inode, struct file *filp) { struct iommufd_ctx *ictx = filp->private_data; struct iommufd_object *obj; /* * The objects in the xarray form a graph of "users" counts, and we have * to destroy them in a depth first manner. Leaf objects will reduce the * users count of interior objects when they are destroyed. * * Repeatedly destroying all the "1 users" leaf objects will progress * until the entire list is destroyed. If this can't progress then there * is some bug related to object refcounting. */ while (!xa_empty(&ictx->objects)) { unsigned int destroyed = 0; unsigned long index; xa_for_each(&ictx->objects, index, obj) { if (!refcount_dec_if_one(&obj->users)) continue; destroyed++; xa_erase(&ictx->objects, index); iommufd_object_ops[obj->type].destroy(obj); kfree(obj); } /* Bug related to users refcount */ if (WARN_ON(!destroyed)) break; } WARN_ON(!xa_empty(&ictx->groups)); kfree(ictx); return 0; } static int iommufd_option(struct iommufd_ucmd *ucmd) { struct iommu_option *cmd = ucmd->cmd; int rc; if (cmd->__reserved) return -EOPNOTSUPP; switch (cmd->option_id) { case IOMMU_OPTION_RLIMIT_MODE: rc = iommufd_option_rlimit_mode(cmd, ucmd->ictx); break; case IOMMU_OPTION_HUGE_PAGES: rc = iommufd_ioas_option(ucmd); break; default: return -EOPNOTSUPP; } if (rc) return rc; if (copy_to_user(&((struct iommu_option __user *)ucmd->ubuffer)->val64, &cmd->val64, sizeof(cmd->val64))) return -EFAULT; return 0; } union ucmd_buffer { struct iommu_destroy destroy; struct iommu_fault_alloc fault; struct iommu_hw_info info; struct iommu_hwpt_alloc hwpt; struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; struct iommu_hwpt_invalidate cache; struct iommu_hwpt_set_dirty_tracking set_dirty_tracking; struct iommu_ioas_alloc alloc; struct iommu_ioas_allow_iovas allow_iovas; struct iommu_ioas_copy ioas_copy; struct iommu_ioas_iova_ranges iova_ranges; struct iommu_ioas_map map; struct iommu_ioas_unmap unmap; struct iommu_option option; struct iommu_vdevice_alloc vdev; struct iommu_vfio_ioas vfio_ioas; struct iommu_viommu_alloc viommu; #ifdef CONFIG_IOMMUFD_TEST struct iommu_test_cmd test; #endif }; struct iommufd_ioctl_op { unsigned int size; unsigned int min_size; unsigned int ioctl_num; int (*execute)(struct iommufd_ucmd *ucmd); }; #define IOCTL_OP(_ioctl, _fn, _struct, _last) \ [_IOC_NR(_ioctl) - IOMMUFD_CMD_BASE] = { \ .size = sizeof(_struct) + \ BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \ sizeof(_struct)), \ .min_size = offsetofend(_struct, _last), \ .ioctl_num = _ioctl, \ .execute = _fn, \ } static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), IOCTL_OP(IOMMU_FAULT_QUEUE_ALLOC, iommufd_fault_alloc, struct iommu_fault_alloc, out_fault_fd), IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info, __reserved), IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, __reserved), IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap, struct iommu_hwpt_get_dirty_bitmap, data), IOCTL_OP(IOMMU_HWPT_INVALIDATE, iommufd_hwpt_invalidate, struct iommu_hwpt_invalidate, __reserved), IOCTL_OP(IOMMU_HWPT_SET_DIRTY_TRACKING, iommufd_hwpt_set_dirty_tracking, struct iommu_hwpt_set_dirty_tracking, __reserved), IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, struct iommu_ioas_alloc, out_ioas_id), IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas, struct iommu_ioas_allow_iovas, allowed_iovas), IOCTL_OP(IOMMU_IOAS_CHANGE_PROCESS, iommufd_ioas_change_process, struct iommu_ioas_change_process, __reserved), IOCTL_OP(IOMMU_IOAS_COPY, iommufd_ioas_copy, struct iommu_ioas_copy, src_iova), IOCTL_OP(IOMMU_IOAS_IOVA_RANGES, iommufd_ioas_iova_ranges, struct iommu_ioas_iova_ranges, out_iova_alignment), IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, iova), IOCTL_OP(IOMMU_IOAS_MAP_FILE, iommufd_ioas_map_file, struct iommu_ioas_map_file, iova), IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap, length), IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64), IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl, struct iommu_vdevice_alloc, virt_id), IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas, __reserved), IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl, struct iommu_viommu_alloc, out_viommu_id), #ifdef CONFIG_IOMMUFD_TEST IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last), #endif }; static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct iommufd_ctx *ictx = filp->private_data; const struct iommufd_ioctl_op *op; struct iommufd_ucmd ucmd = {}; union ucmd_buffer buf; unsigned int nr; int ret; nr = _IOC_NR(cmd); if (nr < IOMMUFD_CMD_BASE || (nr - IOMMUFD_CMD_BASE) >= ARRAY_SIZE(iommufd_ioctl_ops)) return iommufd_vfio_ioctl(ictx, cmd, arg); ucmd.ictx = ictx; ucmd.ubuffer = (void __user *)arg; ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer); if (ret) return ret; op = &iommufd_ioctl_ops[nr - IOMMUFD_CMD_BASE]; if (op->ioctl_num != cmd) return -ENOIOCTLCMD; if (ucmd.user_size < op->min_size) return -EINVAL; ucmd.cmd = &buf; ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer, ucmd.user_size); if (ret) return ret; ret = op->execute(&ucmd); return ret; } static const struct file_operations iommufd_fops = { .owner = THIS_MODULE, .open = iommufd_fops_open, .release = iommufd_fops_release, .unlocked_ioctl = iommufd_fops_ioctl, }; /** * iommufd_ctx_get - Get a context reference * @ictx: Context to get * * The caller must already hold a valid reference to ictx. */ void iommufd_ctx_get(struct iommufd_ctx *ictx) { get_file(ictx->file); } EXPORT_SYMBOL_NS_GPL(iommufd_ctx_get, "IOMMUFD"); /** * iommufd_ctx_from_file - Acquires a reference to the iommufd context * @file: File to obtain the reference from * * Returns a pointer to the iommufd_ctx, otherwise ERR_PTR. The struct file * remains owned by the caller and the caller must still do fput. On success * the caller is responsible to call iommufd_ctx_put(). */ struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) { struct iommufd_ctx *ictx; if (file->f_op != &iommufd_fops) return ERR_PTR(-EBADFD); ictx = file->private_data; iommufd_ctx_get(ictx); return ictx; } EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_file, "IOMMUFD"); /** * iommufd_ctx_from_fd - Acquires a reference to the iommufd context * @fd: File descriptor to obtain the reference from * * Returns a pointer to the iommufd_ctx, otherwise ERR_PTR. On success * the caller is responsible to call iommufd_ctx_put(). */ struct iommufd_ctx *iommufd_ctx_from_fd(int fd) { struct file *file; file = fget(fd); if (!file) return ERR_PTR(-EBADF); if (file->f_op != &iommufd_fops) { fput(file); return ERR_PTR(-EBADFD); } /* fget is the same as iommufd_ctx_get() */ return file->private_data; } EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_fd, "IOMMUFD"); /** * iommufd_ctx_put - Put back a reference * @ictx: Context to put back */ void iommufd_ctx_put(struct iommufd_ctx *ictx) { fput(ictx->file); } EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, "IOMMUFD"); static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_ACCESS] = { .destroy = iommufd_access_destroy_object, }, [IOMMUFD_OBJ_DEVICE] = { .destroy = iommufd_device_destroy, }, [IOMMUFD_OBJ_FAULT] = { .destroy = iommufd_fault_destroy, }, [IOMMUFD_OBJ_HWPT_PAGING] = { .destroy = iommufd_hwpt_paging_destroy, .abort = iommufd_hwpt_paging_abort, }, [IOMMUFD_OBJ_HWPT_NESTED] = { .destroy = iommufd_hwpt_nested_destroy, .abort = iommufd_hwpt_nested_abort, }, [IOMMUFD_OBJ_IOAS] = { .destroy = iommufd_ioas_destroy, }, [IOMMUFD_OBJ_VDEVICE] = { .destroy = iommufd_vdevice_destroy, }, [IOMMUFD_OBJ_VIOMMU] = { .destroy = iommufd_viommu_destroy, }, #ifdef CONFIG_IOMMUFD_TEST [IOMMUFD_OBJ_SELFTEST] = { .destroy = iommufd_selftest_destroy, }, #endif }; static struct miscdevice iommu_misc_dev = { .minor = MISC_DYNAMIC_MINOR, .name = "iommu", .fops = &iommufd_fops, .nodename = "iommu", .mode = 0660, }; static struct miscdevice vfio_misc_dev = { .minor = VFIO_MINOR, .name = "vfio", .fops = &iommufd_fops, .nodename = "vfio/vfio", .mode = 0666, }; static int __init iommufd_init(void) { int ret; ret = misc_register(&iommu_misc_dev); if (ret) return ret; if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) { ret = misc_register(&vfio_misc_dev); if (ret) goto err_misc; } ret = iommufd_test_init(); if (ret) goto err_vfio_misc; return 0; err_vfio_misc: if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) misc_deregister(&vfio_misc_dev); err_misc: misc_deregister(&iommu_misc_dev); return ret; } static void __exit iommufd_exit(void) { iommufd_test_exit(); if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) misc_deregister(&vfio_misc_dev); misc_deregister(&iommu_misc_dev); } module_init(iommufd_init); module_exit(iommufd_exit); #if IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER) MODULE_ALIAS_MISCDEV(VFIO_MINOR); MODULE_ALIAS("devname:vfio/vfio"); #endif MODULE_IMPORT_NS("IOMMUFD_INTERNAL"); MODULE_IMPORT_NS("IOMMUFD"); MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); MODULE_LICENSE("GPL");
9 8 7 9 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 /* Kernel module to match connection tracking byte counter. * GPL (C) 2002 Martin Devera (devik@cdi.cz). */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/math64.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_connbytes.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_acct.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: Number of packets/bytes per connection matching"); MODULE_ALIAS("ipt_connbytes"); MODULE_ALIAS("ip6t_connbytes"); static bool connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_connbytes_info *sinfo = par->matchinfo; const struct nf_conn *ct; enum ip_conntrack_info ctinfo; u_int64_t what = 0; /* initialize to make gcc happy */ u_int64_t bytes = 0; u_int64_t pkts = 0; const struct nf_conn_acct *acct; const struct nf_conn_counter *counters; ct = nf_ct_get(skb, &ctinfo); if (!ct) return false; acct = nf_conn_acct_find(ct); if (!acct) return false; counters = acct->counter; switch (sinfo->what) { case XT_CONNBYTES_PKTS: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); break; case XT_CONNBYTES_DIR_REPLY: what = atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; case XT_CONNBYTES_DIR_BOTH: what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); what += atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; } break; case XT_CONNBYTES_BYTES: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); break; case XT_CONNBYTES_DIR_REPLY: what = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); break; case XT_CONNBYTES_DIR_BOTH: what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); what += atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); break; } break; case XT_CONNBYTES_AVGPKT: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); break; case XT_CONNBYTES_DIR_REPLY: bytes = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); pkts = atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; case XT_CONNBYTES_DIR_BOTH: bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes) + atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets) + atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; } if (pkts != 0) what = div64_u64(bytes, pkts); break; } if (sinfo->count.to >= sinfo->count.from) return what <= sinfo->count.to && what >= sinfo->count.from; else /* inverted */ return what < sinfo->count.to || what > sinfo->count.from; } static int connbytes_mt_check(const struct xt_mtchk_param *par) { const struct xt_connbytes_info *sinfo = par->matchinfo; int ret; if (sinfo->what != XT_CONNBYTES_PKTS && sinfo->what != XT_CONNBYTES_BYTES && sinfo->what != XT_CONNBYTES_AVGPKT) return -EINVAL; if (sinfo->direction != XT_CONNBYTES_DIR_ORIGINAL && sinfo->direction != XT_CONNBYTES_DIR_REPLY && sinfo->direction != XT_CONNBYTES_DIR_BOTH) return -EINVAL; ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info_ratelimited("cannot load conntrack support for proto=%u\n", par->family); return ret; } /* * This filter cannot function correctly unless connection tracking * accounting is enabled, so complain in the hope that someone notices. */ if (!nf_ct_acct_enabled(par->net)) { pr_warn("Forcing CT accounting to be enabled\n"); nf_ct_set_acct(par->net, true); } return ret; } static void connbytes_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static struct xt_match connbytes_mt_reg __read_mostly = { .name = "connbytes", .revision = 0, .family = NFPROTO_UNSPEC, .checkentry = connbytes_mt_check, .match = connbytes_mt, .destroy = connbytes_mt_destroy, .matchsize = sizeof(struct xt_connbytes_info), .me = THIS_MODULE, }; static int __init connbytes_mt_init(void) { return xt_register_match(&connbytes_mt_reg); } static void __exit connbytes_mt_exit(void) { xt_unregister_match(&connbytes_mt_reg); } module_init(connbytes_mt_init); module_exit(connbytes_mt_exit);
1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 // SPDX-License-Identifier: GPL-2.0+ /* * Infinity Unlimited USB Phoenix driver * * Copyright (C) 2010 James Courtier-Dutton (James@superbug.co.uk) * Copyright (C) 2007 Alain Degreffe (eczema@ecze.com) * * Original code taken from iuutool (Copyright (C) 2006 Juan Carlos Borrás) * * And tested with help of WB Electronics */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/serial.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/spinlock.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include "iuu_phoenix.h" #include <linux/random.h> #define DRIVER_DESC "Infinity USB Unlimited Phoenix driver" static const struct usb_device_id id_table[] = { {USB_DEVICE(IUU_USB_VENDOR_ID, IUU_USB_PRODUCT_ID)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, id_table); /* turbo parameter */ static int boost = 100; static int clockmode = 1; static int cdmode = 1; static int iuu_cardin; static int iuu_cardout; static bool xmas; static int vcc_default = 5; static int iuu_create_sysfs_attrs(struct usb_serial_port *port); static int iuu_remove_sysfs_attrs(struct usb_serial_port *port); static void read_rxcmd_callback(struct urb *urb); struct iuu_private { spinlock_t lock; /* store irq state */ u8 line_status; int tiostatus; /* store IUART SIGNAL for tiocmget call */ u8 reset; /* if 1 reset is needed */ int poll; /* number of poll */ u8 *writebuf; /* buffer for writing to device */ int writelen; /* num of byte to write to device */ u8 *buf; /* used for initialize speed */ u8 len; int vcc; /* vcc (either 3 or 5 V) */ u32 boost; u32 clk; }; static int iuu_port_probe(struct usb_serial_port *port) { struct iuu_private *priv; int ret; priv = kzalloc(sizeof(struct iuu_private), GFP_KERNEL); if (!priv) return -ENOMEM; priv->buf = kzalloc(256, GFP_KERNEL); if (!priv->buf) { kfree(priv); return -ENOMEM; } priv->writebuf = kzalloc(256, GFP_KERNEL); if (!priv->writebuf) { kfree(priv->buf); kfree(priv); return -ENOMEM; } priv->vcc = vcc_default; spin_lock_init(&priv->lock); usb_set_serial_port_data(port, priv); ret = iuu_create_sysfs_attrs(port); if (ret) { kfree(priv->writebuf); kfree(priv->buf); kfree(priv); return ret; } return 0; } static void iuu_port_remove(struct usb_serial_port *port) { struct iuu_private *priv = usb_get_serial_port_data(port); iuu_remove_sysfs_attrs(port); kfree(priv->writebuf); kfree(priv->buf); kfree(priv); } static int iuu_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; struct iuu_private *priv = usb_get_serial_port_data(port); unsigned long flags; /* FIXME: locking on tiomstatus */ dev_dbg(&port->dev, "%s msg : SET = 0x%04x, CLEAR = 0x%04x\n", __func__, set, clear); spin_lock_irqsave(&priv->lock, flags); if ((set & TIOCM_RTS) && !(priv->tiostatus == TIOCM_RTS)) { dev_dbg(&port->dev, "%s TIOCMSET RESET called !!!\n", __func__); priv->reset = 1; } if (set & TIOCM_RTS) priv->tiostatus = TIOCM_RTS; spin_unlock_irqrestore(&priv->lock, flags); return 0; } /* This is used to provide a carrier detect mechanism * When a card is present, the response is 0x00 * When no card , the reader respond with TIOCM_CD * This is known as CD autodetect mechanism */ static int iuu_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct iuu_private *priv = usb_get_serial_port_data(port); unsigned long flags; int rc; spin_lock_irqsave(&priv->lock, flags); rc = priv->tiostatus; spin_unlock_irqrestore(&priv->lock, flags); return rc; } static void iuu_rxcmd(struct urb *urb) { struct usb_serial_port *port = urb->context; int status = urb->status; if (status) { dev_dbg(&port->dev, "%s - status = %d\n", __func__, status); /* error stop all */ return; } memset(port->write_urb->transfer_buffer, IUU_UART_RX, 1); usb_fill_bulk_urb(port->write_urb, port->serial->dev, usb_sndbulkpipe(port->serial->dev, port->bulk_out_endpointAddress), port->write_urb->transfer_buffer, 1, read_rxcmd_callback, port); usb_submit_urb(port->write_urb, GFP_ATOMIC); } static int iuu_reset(struct usb_serial_port *port, u8 wt) { struct iuu_private *priv = usb_get_serial_port_data(port); int result; char *buf_ptr = port->write_urb->transfer_buffer; /* Prepare the reset sequence */ *buf_ptr++ = IUU_RST_SET; *buf_ptr++ = IUU_DELAY_MS; *buf_ptr++ = wt; *buf_ptr = IUU_RST_CLEAR; /* send the sequence */ usb_fill_bulk_urb(port->write_urb, port->serial->dev, usb_sndbulkpipe(port->serial->dev, port->bulk_out_endpointAddress), port->write_urb->transfer_buffer, 4, iuu_rxcmd, port); result = usb_submit_urb(port->write_urb, GFP_ATOMIC); priv->reset = 0; return result; } /* Status Function * Return value is * 0x00 = no card * 0x01 = smartcard * 0x02 = sim card */ static void iuu_update_status_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; struct iuu_private *priv = usb_get_serial_port_data(port); u8 *st; int status = urb->status; if (status) { dev_dbg(&port->dev, "%s - status = %d\n", __func__, status); /* error stop all */ return; } st = urb->transfer_buffer; dev_dbg(&port->dev, "%s - enter\n", __func__); if (urb->actual_length == 1) { switch (st[0]) { case 0x1: priv->tiostatus = iuu_cardout; break; case 0x0: priv->tiostatus = iuu_cardin; break; default: priv->tiostatus = iuu_cardin; } } iuu_rxcmd(urb); } static void iuu_status_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; int status = urb->status; dev_dbg(&port->dev, "%s - status = %d\n", __func__, status); usb_fill_bulk_urb(port->read_urb, port->serial->dev, usb_rcvbulkpipe(port->serial->dev, port->bulk_in_endpointAddress), port->read_urb->transfer_buffer, 256, iuu_update_status_callback, port); usb_submit_urb(port->read_urb, GFP_ATOMIC); } static int iuu_status(struct usb_serial_port *port) { int result; memset(port->write_urb->transfer_buffer, IUU_GET_STATE_REGISTER, 1); usb_fill_bulk_urb(port->write_urb, port->serial->dev, usb_sndbulkpipe(port->serial->dev, port->bulk_out_endpointAddress), port->write_urb->transfer_buffer, 1, iuu_status_callback, port); result = usb_submit_urb(port->write_urb, GFP_ATOMIC); return result; } static int bulk_immediate(struct usb_serial_port *port, u8 *buf, u8 count) { int status; struct usb_serial *serial = port->serial; int actual = 0; /* send the data out the bulk port */ status = usb_bulk_msg(serial->dev, usb_sndbulkpipe(serial->dev, port->bulk_out_endpointAddress), buf, count, &actual, 1000); if (status != IUU_OPERATION_OK) dev_dbg(&port->dev, "%s - error = %2x\n", __func__, status); else dev_dbg(&port->dev, "%s - write OK !\n", __func__); return status; } static int read_immediate(struct usb_serial_port *port, u8 *buf, u8 count) { int status; struct usb_serial *serial = port->serial; int actual = 0; /* send the data out the bulk port */ status = usb_bulk_msg(serial->dev, usb_rcvbulkpipe(serial->dev, port->bulk_in_endpointAddress), buf, count, &actual, 1000); if (status != IUU_OPERATION_OK) dev_dbg(&port->dev, "%s - error = %2x\n", __func__, status); else dev_dbg(&port->dev, "%s - read OK !\n", __func__); return status; } static int iuu_led(struct usb_serial_port *port, unsigned int R, unsigned int G, unsigned int B, u8 f) { int status; u8 *buf; buf = kmalloc(8, GFP_KERNEL); if (!buf) return -ENOMEM; buf[0] = IUU_SET_LED; buf[1] = R & 0xFF; buf[2] = (R >> 8) & 0xFF; buf[3] = G & 0xFF; buf[4] = (G >> 8) & 0xFF; buf[5] = B & 0xFF; buf[6] = (B >> 8) & 0xFF; buf[7] = f; status = bulk_immediate(port, buf, 8); kfree(buf); if (status != IUU_OPERATION_OK) dev_dbg(&port->dev, "%s - led error status = %2x\n", __func__, status); else dev_dbg(&port->dev, "%s - led OK !\n", __func__); return IUU_OPERATION_OK; } static void iuu_rgbf_fill_buffer(u8 *buf, u8 r1, u8 r2, u8 g1, u8 g2, u8 b1, u8 b2, u8 freq) { *buf++ = IUU_SET_LED; *buf++ = r1; *buf++ = r2; *buf++ = g1; *buf++ = g2; *buf++ = b1; *buf++ = b2; *buf = freq; } static void iuu_led_activity_on(struct urb *urb) { struct usb_serial_port *port = urb->context; char *buf_ptr = port->write_urb->transfer_buffer; if (xmas) { buf_ptr[0] = IUU_SET_LED; get_random_bytes(buf_ptr + 1, 6); buf_ptr[7] = 1; } else { iuu_rgbf_fill_buffer(buf_ptr, 255, 255, 0, 0, 0, 0, 255); } usb_fill_bulk_urb(port->write_urb, port->serial->dev, usb_sndbulkpipe(port->serial->dev, port->bulk_out_endpointAddress), port->write_urb->transfer_buffer, 8 , iuu_rxcmd, port); usb_submit_urb(port->write_urb, GFP_ATOMIC); } static void iuu_led_activity_off(struct urb *urb) { struct usb_serial_port *port = urb->context; char *buf_ptr = port->write_urb->transfer_buffer; if (xmas) { iuu_rxcmd(urb); return; } iuu_rgbf_fill_buffer(buf_ptr, 0, 0, 255, 255, 0, 0, 255); usb_fill_bulk_urb(port->write_urb, port->serial->dev, usb_sndbulkpipe(port->serial->dev, port->bulk_out_endpointAddress), port->write_urb->transfer_buffer, 8 , iuu_rxcmd, port); usb_submit_urb(port->write_urb, GFP_ATOMIC); } static int iuu_clk(struct usb_serial_port *port, int dwFrq) { int status; struct iuu_private *priv = usb_get_serial_port_data(port); int Count = 0; u8 FrqGenAdr = 0x69; u8 DIV = 0; /* 8bit */ u8 XDRV = 0; /* 8bit */ u8 PUMP = 0; /* 3bit */ u8 PBmsb = 0; /* 2bit */ u8 PBlsb = 0; /* 8bit */ u8 PO = 0; /* 1bit */ u8 Q = 0; /* 7bit */ /* 24bit = 3bytes */ unsigned int P = 0; unsigned int P2 = 0; int frq = (int)dwFrq; if (frq == 0) { priv->buf[Count++] = IUU_UART_WRITE_I2C; priv->buf[Count++] = FrqGenAdr << 1; priv->buf[Count++] = 0x09; priv->buf[Count++] = 0x00; status = bulk_immediate(port, (u8 *) priv->buf, Count); if (status != 0) { dev_dbg(&port->dev, "%s - write error\n", __func__); return status; } } else if (frq == 3579000) { DIV = 100; P = 1193; Q = 40; XDRV = 0; } else if (frq == 3680000) { DIV = 105; P = 161; Q = 5; XDRV = 0; } else if (frq == 6000000) { DIV = 66; P = 66; Q = 2; XDRV = 0x28; } else { unsigned int result = 0; unsigned int tmp = 0; unsigned int check; unsigned int check2; char found = 0x00; unsigned int lQ = 2; unsigned int lP = 2055; unsigned int lDiv = 4; for (lQ = 2; lQ <= 47 && !found; lQ++) for (lP = 2055; lP >= 8 && !found; lP--) for (lDiv = 4; lDiv <= 127 && !found; lDiv++) { tmp = (12000000 / lDiv) * (lP / lQ); if (abs((int)(tmp - frq)) < abs((int)(frq - result))) { check2 = (12000000 / lQ); if (check2 < 250000) continue; check = (12000000 / lQ) * lP; if (check > 400000000) continue; if (check < 100000000) continue; if (lDiv < 4 || lDiv > 127) continue; result = tmp; P = lP; DIV = lDiv; Q = lQ; if (result == frq) found = 0x01; } } } P2 = ((P - PO) / 2) - 4; PUMP = 0x04; PBmsb = (P2 >> 8 & 0x03); PBlsb = P2 & 0xFF; PO = (P >> 10) & 0x01; Q = Q - 2; priv->buf[Count++] = IUU_UART_WRITE_I2C; /* 0x4C */ priv->buf[Count++] = FrqGenAdr << 1; priv->buf[Count++] = 0x09; priv->buf[Count++] = 0x20; /* Adr = 0x09 */ priv->buf[Count++] = IUU_UART_WRITE_I2C; /* 0x4C */ priv->buf[Count++] = FrqGenAdr << 1; priv->buf[Count++] = 0x0C; priv->buf[Count++] = DIV; /* Adr = 0x0C */ priv->buf[Count++] = IUU_UART_WRITE_I2C; /* 0x4C */ priv->buf[Count++] = FrqGenAdr << 1; priv->buf[Count++] = 0x12; priv->buf[Count++] = XDRV; /* Adr = 0x12 */ priv->buf[Count++] = IUU_UART_WRITE_I2C; /* 0x4C */ priv->buf[Count++] = FrqGenAdr << 1; priv->buf[Count++] = 0x13; priv->buf[Count++] = 0x6B; /* Adr = 0x13 */ priv->buf[Count++] = IUU_UART_WRITE_I2C; /* 0x4C */ priv->buf[Count++] = FrqGenAdr << 1; priv->buf[Count++] = 0x40; priv->buf[Count++] = (0xC0 | ((PUMP & 0x07) << 2)) | (PBmsb & 0x03); /* Adr = 0x40 */ priv->buf[Count++] = IUU_UART_WRITE_I2C; /* 0x4C */ priv->buf[Count++] = FrqGenAdr << 1; priv->buf[Count++] = 0x41; priv->buf[Count++] = PBlsb; /* Adr = 0x41 */ priv->buf[Count++] = IUU_UART_WRITE_I2C; /* 0x4C */ priv->buf[Count++] = FrqGenAdr << 1; priv->buf[Count++] = 0x42; priv->buf[Count++] = Q | (((PO & 0x01) << 7)); /* Adr = 0x42 */ priv->buf[Count++] = IUU_UART_WRITE_I2C; /* 0x4C */ priv->buf[Count++] = FrqGenAdr << 1; priv->buf[Count++] = 0x44; priv->buf[Count++] = (char)0xFF; /* Adr = 0x44 */ priv->buf[Count++] = IUU_UART_WRITE_I2C; /* 0x4C */ priv->buf[Count++] = FrqGenAdr << 1; priv->buf[Count++] = 0x45; priv->buf[Count++] = (char)0xFE; /* Adr = 0x45 */ priv->buf[Count++] = IUU_UART_WRITE_I2C; /* 0x4C */ priv->buf[Count++] = FrqGenAdr << 1; priv->buf[Count++] = 0x46; priv->buf[Count++] = 0x7F; /* Adr = 0x46 */ priv->buf[Count++] = IUU_UART_WRITE_I2C; /* 0x4C */ priv->buf[Count++] = FrqGenAdr << 1; priv->buf[Count++] = 0x47; priv->buf[Count++] = (char)0x84; /* Adr = 0x47 */ status = bulk_immediate(port, (u8 *) priv->buf, Count); if (status != IUU_OPERATION_OK) dev_dbg(&port->dev, "%s - write error\n", __func__); return status; } static int iuu_uart_flush(struct usb_serial_port *port) { struct device *dev = &port->dev; int i; int status; u8 *rxcmd; struct iuu_private *priv = usb_get_serial_port_data(port); if (iuu_led(port, 0xF000, 0, 0, 0xFF) < 0) return -EIO; rxcmd = kmalloc(1, GFP_KERNEL); if (!rxcmd) return -ENOMEM; rxcmd[0] = IUU_UART_RX; for (i = 0; i < 2; i++) { status = bulk_immediate(port, rxcmd, 1); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_write error\n", __func__); goto out_free; } status = read_immediate(port, &priv->len, 1); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_read error\n", __func__); goto out_free; } if (priv->len > 0) { dev_dbg(dev, "%s - uart_flush datalen is : %i\n", __func__, priv->len); status = read_immediate(port, priv->buf, priv->len); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_read error\n", __func__); goto out_free; } } } dev_dbg(dev, "%s - uart_flush_read OK!\n", __func__); iuu_led(port, 0, 0xF000, 0, 0xFF); out_free: kfree(rxcmd); return status; } static void read_buf_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; int status = urb->status; if (status) { if (status == -EPROTO) { /* reschedule needed */ } return; } dev_dbg(&port->dev, "%s - %i chars to write\n", __func__, urb->actual_length); if (urb->actual_length) { tty_insert_flip_string(&port->port, data, urb->actual_length); tty_flip_buffer_push(&port->port); } iuu_led_activity_on(urb); } static int iuu_bulk_write(struct usb_serial_port *port) { struct iuu_private *priv = usb_get_serial_port_data(port); unsigned long flags; int result; int buf_len; char *buf_ptr = port->write_urb->transfer_buffer; spin_lock_irqsave(&priv->lock, flags); *buf_ptr++ = IUU_UART_ESC; *buf_ptr++ = IUU_UART_TX; *buf_ptr++ = priv->writelen; memcpy(buf_ptr, priv->writebuf, priv->writelen); buf_len = priv->writelen; priv->writelen = 0; spin_unlock_irqrestore(&priv->lock, flags); dev_dbg(&port->dev, "%s - writing %i chars : %*ph\n", __func__, buf_len, buf_len, buf_ptr); usb_fill_bulk_urb(port->write_urb, port->serial->dev, usb_sndbulkpipe(port->serial->dev, port->bulk_out_endpointAddress), port->write_urb->transfer_buffer, buf_len + 3, iuu_rxcmd, port); result = usb_submit_urb(port->write_urb, GFP_ATOMIC); usb_serial_port_softint(port); return result; } static int iuu_read_buf(struct usb_serial_port *port, int len) { int result; usb_fill_bulk_urb(port->read_urb, port->serial->dev, usb_rcvbulkpipe(port->serial->dev, port->bulk_in_endpointAddress), port->read_urb->transfer_buffer, len, read_buf_callback, port); result = usb_submit_urb(port->read_urb, GFP_ATOMIC); return result; } static void iuu_uart_read_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; struct iuu_private *priv = usb_get_serial_port_data(port); unsigned long flags; int status = urb->status; int len = 0; unsigned char *data = urb->transfer_buffer; priv->poll++; if (status) { dev_dbg(&port->dev, "%s - status = %d\n", __func__, status); /* error stop all */ return; } if (urb->actual_length == 1) len = (int) data[0]; if (urb->actual_length > 1) { dev_dbg(&port->dev, "%s - urb->actual_length = %i\n", __func__, urb->actual_length); return; } /* if len > 0 call readbuf */ if (len > 0) { dev_dbg(&port->dev, "%s - call read buf - len to read is %i\n", __func__, len); status = iuu_read_buf(port, len); return; } /* need to update status ? */ if (priv->poll > 99) { status = iuu_status(port); priv->poll = 0; return; } /* reset waiting ? */ if (priv->reset == 1) { status = iuu_reset(port, 0xC); return; } /* Writebuf is waiting */ spin_lock_irqsave(&priv->lock, flags); if (priv->writelen > 0) { spin_unlock_irqrestore(&priv->lock, flags); status = iuu_bulk_write(port); return; } spin_unlock_irqrestore(&priv->lock, flags); /* if nothing to write call again rxcmd */ dev_dbg(&port->dev, "%s - rxcmd recall\n", __func__); iuu_led_activity_off(urb); } static int iuu_uart_write(struct tty_struct *tty, struct usb_serial_port *port, const u8 *buf, int count) { struct iuu_private *priv = usb_get_serial_port_data(port); unsigned long flags; spin_lock_irqsave(&priv->lock, flags); count = min(count, 256 - priv->writelen); if (count == 0) goto out; /* fill the buffer */ memcpy(priv->writebuf + priv->writelen, buf, count); priv->writelen += count; out: spin_unlock_irqrestore(&priv->lock, flags); return count; } static void read_rxcmd_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; int result; int status = urb->status; if (status) { /* error stop all */ return; } usb_fill_bulk_urb(port->read_urb, port->serial->dev, usb_rcvbulkpipe(port->serial->dev, port->bulk_in_endpointAddress), port->read_urb->transfer_buffer, 256, iuu_uart_read_callback, port); result = usb_submit_urb(port->read_urb, GFP_ATOMIC); dev_dbg(&port->dev, "%s - submit result = %d\n", __func__, result); } static int iuu_uart_on(struct usb_serial_port *port) { int status; u8 *buf; buf = kmalloc(4, GFP_KERNEL); if (!buf) return -ENOMEM; buf[0] = IUU_UART_ENABLE; buf[1] = (u8) ((IUU_BAUD_9600 >> 8) & 0x00FF); buf[2] = (u8) (0x00FF & IUU_BAUD_9600); buf[3] = (u8) (0x0F0 & IUU_ONE_STOP_BIT) | (0x07 & IUU_PARITY_EVEN); status = bulk_immediate(port, buf, 4); if (status != IUU_OPERATION_OK) { dev_dbg(&port->dev, "%s - uart_on error\n", __func__); goto uart_enable_failed; } /* iuu_reset() the card after iuu_uart_on() */ status = iuu_uart_flush(port); if (status != IUU_OPERATION_OK) dev_dbg(&port->dev, "%s - uart_flush error\n", __func__); uart_enable_failed: kfree(buf); return status; } /* Disables the IUU UART (a.k.a. the Phoenix voiderface) */ static int iuu_uart_off(struct usb_serial_port *port) { int status; u8 *buf; buf = kmalloc(1, GFP_KERNEL); if (!buf) return -ENOMEM; buf[0] = IUU_UART_DISABLE; status = bulk_immediate(port, buf, 1); if (status != IUU_OPERATION_OK) dev_dbg(&port->dev, "%s - uart_off error\n", __func__); kfree(buf); return status; } static int iuu_uart_baud(struct usb_serial_port *port, u32 baud_base, u32 *actual, u8 parity) { int status; u32 baud; u8 *dataout; u8 DataCount = 0; u8 T1Frekvens = 0; u8 T1reload = 0; unsigned int T1FrekvensHZ = 0; dev_dbg(&port->dev, "%s - enter baud_base=%d\n", __func__, baud_base); dataout = kmalloc(5, GFP_KERNEL); if (!dataout) return -ENOMEM; /*baud = (((priv->clk / 35) * baud_base) / 100000); */ baud = baud_base; if (baud < 1200 || baud > 230400) { kfree(dataout); return IUU_INVALID_PARAMETER; } if (baud > 977) { T1Frekvens = 3; T1FrekvensHZ = 500000; } if (baud > 3906) { T1Frekvens = 2; T1FrekvensHZ = 2000000; } if (baud > 11718) { T1Frekvens = 1; T1FrekvensHZ = 6000000; } if (baud > 46875) { T1Frekvens = 0; T1FrekvensHZ = 24000000; } T1reload = 256 - (u8) (T1FrekvensHZ / (baud * 2)); /* magic number here: ENTER_FIRMWARE_UPDATE; */ dataout[DataCount++] = IUU_UART_ESC; /* magic number here: CHANGE_BAUD; */ dataout[DataCount++] = IUU_UART_CHANGE; dataout[DataCount++] = T1Frekvens; dataout[DataCount++] = T1reload; *actual = (T1FrekvensHZ / (256 - T1reload)) / 2; switch (parity & 0x0F) { case IUU_PARITY_NONE: dataout[DataCount++] = 0x00; break; case IUU_PARITY_EVEN: dataout[DataCount++] = 0x01; break; case IUU_PARITY_ODD: dataout[DataCount++] = 0x02; break; case IUU_PARITY_MARK: dataout[DataCount++] = 0x03; break; case IUU_PARITY_SPACE: dataout[DataCount++] = 0x04; break; default: kfree(dataout); return IUU_INVALID_PARAMETER; } switch (parity & 0xF0) { case IUU_ONE_STOP_BIT: dataout[DataCount - 1] |= IUU_ONE_STOP_BIT; break; case IUU_TWO_STOP_BITS: dataout[DataCount - 1] |= IUU_TWO_STOP_BITS; break; default: kfree(dataout); return IUU_INVALID_PARAMETER; } status = bulk_immediate(port, dataout, DataCount); if (status != IUU_OPERATION_OK) dev_dbg(&port->dev, "%s - uart_off error\n", __func__); kfree(dataout); return status; } static void iuu_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { const u32 supported_mask = CMSPAR|PARENB|PARODD; struct iuu_private *priv = usb_get_serial_port_data(port); unsigned int cflag = tty->termios.c_cflag; int status; u32 actual; u32 parity; int csize = CS7; int baud; u32 newval = cflag & supported_mask; /* Just use the ospeed. ispeed should be the same. */ baud = tty->termios.c_ospeed; dev_dbg(&port->dev, "%s - enter c_ospeed or baud=%d\n", __func__, baud); /* compute the parity parameter */ parity = 0; if (cflag & CMSPAR) { /* Using mark space */ if (cflag & PARODD) parity |= IUU_PARITY_SPACE; else parity |= IUU_PARITY_MARK; } else if (!(cflag & PARENB)) { parity |= IUU_PARITY_NONE; csize = CS8; } else if (cflag & PARODD) parity |= IUU_PARITY_ODD; else parity |= IUU_PARITY_EVEN; parity |= (cflag & CSTOPB ? IUU_TWO_STOP_BITS : IUU_ONE_STOP_BIT); /* set it */ status = iuu_uart_baud(port, baud * priv->boost / 100, &actual, parity); /* set the termios value to the real one, so the user now what has * changed. We support few fields so its easies to copy the old hw * settings back over and then adjust them */ if (old_termios) tty_termios_copy_hw(&tty->termios, old_termios); if (status != 0) /* Set failed - return old bits */ return; /* Re-encode speed, parity and csize */ tty_encode_baud_rate(tty, baud, baud); tty->termios.c_cflag &= ~(supported_mask|CSIZE); tty->termios.c_cflag |= newval | csize; } static void iuu_close(struct usb_serial_port *port) { /* iuu_led (port,255,0,0,0); */ iuu_uart_off(port); usb_kill_urb(port->write_urb); usb_kill_urb(port->read_urb); iuu_led(port, 0, 0, 0xF000, 0xFF); } static void iuu_init_termios(struct tty_struct *tty) { tty->termios.c_cflag = B9600 | CS8 | CSTOPB | CREAD | PARENB | CLOCAL; tty->termios.c_ispeed = 9600; tty->termios.c_ospeed = 9600; tty->termios.c_lflag = 0; tty->termios.c_oflag = 0; tty->termios.c_iflag = 0; } static int iuu_open(struct tty_struct *tty, struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct device *dev = &port->dev; int result; int baud; u32 actual; struct iuu_private *priv = usb_get_serial_port_data(port); baud = tty->termios.c_ospeed; dev_dbg(dev, "%s - baud %d\n", __func__, baud); usb_clear_halt(serial->dev, port->write_urb->pipe); usb_clear_halt(serial->dev, port->read_urb->pipe); priv->poll = 0; #define SOUP(a, b, c, d) do { \ result = usb_control_msg(port->serial->dev, \ usb_sndctrlpipe(port->serial->dev, 0), \ b, a, c, d, NULL, 0, 1000); \ dev_dbg(dev, "0x%x:0x%x:0x%x:0x%x %d\n", a, b, c, d, result); } while (0) /* This is not UART related but IUU USB driver related or something */ /* like that. Basically no IUU will accept any commands from the USB */ /* host unless it has received the following message */ /* sprintf(buf ,"%c%c%c%c",0x03,0x02,0x02,0x0); */ SOUP(0x03, 0x02, 0x02, 0x0); iuu_led(port, 0xF000, 0xF000, 0, 0xFF); iuu_uart_on(port); if (boost < 100) boost = 100; priv->boost = boost; switch (clockmode) { case 2: /* 3.680 Mhz */ priv->clk = IUU_CLK_3680000; iuu_clk(port, IUU_CLK_3680000 * boost / 100); result = iuu_uart_baud(port, baud * boost / 100, &actual, IUU_PARITY_EVEN); break; case 3: /* 6.00 Mhz */ iuu_clk(port, IUU_CLK_6000000 * boost / 100); priv->clk = IUU_CLK_6000000; /* Ratio of 6000000 to 3500000 for baud 9600 */ result = iuu_uart_baud(port, 16457 * boost / 100, &actual, IUU_PARITY_EVEN); break; default: /* 3.579 Mhz */ iuu_clk(port, IUU_CLK_3579000 * boost / 100); priv->clk = IUU_CLK_3579000; result = iuu_uart_baud(port, baud * boost / 100, &actual, IUU_PARITY_EVEN); } /* set the cardin cardout signals */ switch (cdmode) { case 0: iuu_cardin = 0; iuu_cardout = 0; break; case 1: iuu_cardin = TIOCM_CD; iuu_cardout = 0; break; case 2: iuu_cardin = 0; iuu_cardout = TIOCM_CD; break; case 3: iuu_cardin = TIOCM_DSR; iuu_cardout = 0; break; case 4: iuu_cardin = 0; iuu_cardout = TIOCM_DSR; break; case 5: iuu_cardin = TIOCM_CTS; iuu_cardout = 0; break; case 6: iuu_cardin = 0; iuu_cardout = TIOCM_CTS; break; case 7: iuu_cardin = TIOCM_RNG; iuu_cardout = 0; break; case 8: iuu_cardin = 0; iuu_cardout = TIOCM_RNG; } iuu_uart_flush(port); dev_dbg(dev, "%s - initialization done\n", __func__); memset(port->write_urb->transfer_buffer, IUU_UART_RX, 1); usb_fill_bulk_urb(port->write_urb, port->serial->dev, usb_sndbulkpipe(port->serial->dev, port->bulk_out_endpointAddress), port->write_urb->transfer_buffer, 1, read_rxcmd_callback, port); result = usb_submit_urb(port->write_urb, GFP_KERNEL); if (result) { dev_err(dev, "%s - failed submitting read urb, error %d\n", __func__, result); iuu_close(port); } else { dev_dbg(dev, "%s - rxcmd OK\n", __func__); } return result; } /* how to change VCC */ static int iuu_vcc_set(struct usb_serial_port *port, unsigned int vcc) { int status; u8 *buf; buf = kmalloc(5, GFP_KERNEL); if (!buf) return -ENOMEM; buf[0] = IUU_SET_VCC; buf[1] = vcc & 0xFF; buf[2] = (vcc >> 8) & 0xFF; buf[3] = (vcc >> 16) & 0xFF; buf[4] = (vcc >> 24) & 0xFF; status = bulk_immediate(port, buf, 5); kfree(buf); if (status != IUU_OPERATION_OK) dev_dbg(&port->dev, "%s - vcc error status = %2x\n", __func__, status); else dev_dbg(&port->dev, "%s - vcc OK !\n", __func__); return status; } /* * Sysfs Attributes */ static ssize_t vcc_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_serial_port *port = to_usb_serial_port(dev); struct iuu_private *priv = usb_get_serial_port_data(port); return sprintf(buf, "%d\n", priv->vcc); } static ssize_t vcc_mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_serial_port *port = to_usb_serial_port(dev); struct iuu_private *priv = usb_get_serial_port_data(port); unsigned long v; if (kstrtoul(buf, 10, &v)) { dev_err(dev, "%s - vcc_mode: %s is not a unsigned long\n", __func__, buf); goto fail_store_vcc_mode; } dev_dbg(dev, "%s: setting vcc_mode = %ld\n", __func__, v); if ((v != 3) && (v != 5)) { dev_err(dev, "%s - vcc_mode %ld is invalid\n", __func__, v); } else { iuu_vcc_set(port, v); priv->vcc = v; } fail_store_vcc_mode: return count; } static DEVICE_ATTR_RW(vcc_mode); static int iuu_create_sysfs_attrs(struct usb_serial_port *port) { return device_create_file(&port->dev, &dev_attr_vcc_mode); } static int iuu_remove_sysfs_attrs(struct usb_serial_port *port) { device_remove_file(&port->dev, &dev_attr_vcc_mode); return 0; } /* * End Sysfs Attributes */ static struct usb_serial_driver iuu_device = { .driver = { .name = "iuu_phoenix", }, .id_table = id_table, .num_ports = 1, .num_bulk_in = 1, .num_bulk_out = 1, .bulk_in_size = 512, .bulk_out_size = 512, .open = iuu_open, .close = iuu_close, .write = iuu_uart_write, .read_bulk_callback = iuu_uart_read_callback, .tiocmget = iuu_tiocmget, .tiocmset = iuu_tiocmset, .set_termios = iuu_set_termios, .init_termios = iuu_init_termios, .port_probe = iuu_port_probe, .port_remove = iuu_port_remove, }; static struct usb_serial_driver * const serial_drivers[] = { &iuu_device, NULL }; module_usb_serial_driver(serial_drivers, id_table); MODULE_AUTHOR("Alain Degreffe eczema@ecze.com"); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); module_param(xmas, bool, 0644); MODULE_PARM_DESC(xmas, "Xmas colors enabled or not"); module_param(boost, int, 0644); MODULE_PARM_DESC(boost, "Card overclock boost (in percent 100-500)"); module_param(clockmode, int, 0644); MODULE_PARM_DESC(clockmode, "Card clock mode (1=3.579 MHz, 2=3.680 MHz, " "3=6 Mhz)"); module_param(cdmode, int, 0644); MODULE_PARM_DESC(cdmode, "Card detect mode (0=none, 1=CD, 2=!CD, 3=DSR, " "4=!DSR, 5=CTS, 6=!CTS, 7=RING, 8=!RING)"); module_param(vcc_default, int, 0644); MODULE_PARM_DESC(vcc_default, "Set default VCC (either 3 for 3.3V or 5 " "for 5V). Default to 5.");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHEFS_EC_H #define _BCACHEFS_EC_H #include "ec_types.h" #include "buckets_types.h" #include "extents_types.h" int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, struct bkey_validate_context); void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_stripe ((struct bkey_ops) { \ .key_validate = bch2_stripe_validate, \ .val_to_text = bch2_stripe_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_stripe, \ .min_val_size = 8, \ }) static inline unsigned stripe_csums_per_device(const struct bch_stripe *s) { return DIV_ROUND_UP(le16_to_cpu(s->sectors), 1 << s->csum_granularity_bits); } static inline unsigned stripe_csum_offset(const struct bch_stripe *s, unsigned dev, unsigned csum_idx) { EBUG_ON(s->csum_type >= BCH_CSUM_NR); unsigned csum_bytes = bch_crc_bytes[s->csum_type]; return sizeof(struct bch_stripe) + sizeof(struct bch_extent_ptr) * s->nr_blocks + (dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes; } static inline unsigned stripe_blockcount_offset(const struct bch_stripe *s, unsigned idx) { return stripe_csum_offset(s, s->nr_blocks, 0) + sizeof(u16) * idx; } static inline unsigned stripe_blockcount_get(const struct bch_stripe *s, unsigned idx) { return le16_to_cpup((void *) s + stripe_blockcount_offset(s, idx)); } static inline void stripe_blockcount_set(struct bch_stripe *s, unsigned idx, unsigned v) { __le16 *p = (void *) s + stripe_blockcount_offset(s, idx); *p = cpu_to_le16(v); } static inline unsigned stripe_val_u64s(const struct bch_stripe *s) { return DIV_ROUND_UP(stripe_blockcount_offset(s, s->nr_blocks), sizeof(u64)); } static inline void *stripe_csum(struct bch_stripe *s, unsigned block, unsigned csum_idx) { EBUG_ON(block >= s->nr_blocks); EBUG_ON(csum_idx >= stripe_csums_per_device(s)); return (void *) s + stripe_csum_offset(s, block, csum_idx); } static inline struct bch_csum stripe_csum_get(struct bch_stripe *s, unsigned block, unsigned csum_idx) { struct bch_csum csum = { 0 }; memcpy(&csum, stripe_csum(s, block, csum_idx), bch_crc_bytes[s->csum_type]); return csum; } static inline void stripe_csum_set(struct bch_stripe *s, unsigned block, unsigned csum_idx, struct bch_csum csum) { memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]); } static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe_ptr, const struct bch_extent_ptr *data_ptr, unsigned sectors) { return (data_ptr->dev == stripe_ptr->dev || data_ptr->dev == BCH_SB_MEMBER_INVALID || stripe_ptr->dev == BCH_SB_MEMBER_INVALID) && data_ptr->gen == stripe_ptr->gen && data_ptr->offset >= stripe_ptr->offset && data_ptr->offset < stripe_ptr->offset + sectors; } static inline bool bch2_ptr_matches_stripe(const struct bch_stripe *s, struct extent_ptr_decoded p) { unsigned nr_data = s->nr_blocks - s->nr_redundant; BUG_ON(!p.has_ec); if (p.ec.block >= nr_data) return false; return __bch2_ptr_matches_stripe(&s->ptrs[p.ec.block], &p.ptr, le16_to_cpu(s->sectors)); } static inline bool bch2_ptr_matches_stripe_m(const struct gc_stripe *m, struct extent_ptr_decoded p) { unsigned nr_data = m->nr_blocks - m->nr_redundant; BUG_ON(!p.has_ec); if (p.ec.block >= nr_data) return false; return __bch2_ptr_matches_stripe(&m->ptrs[p.ec.block], &p.ptr, m->sectors); } struct bch_read_bio; struct ec_stripe_buf { /* might not be buffering the entire stripe: */ unsigned offset; unsigned size; unsigned long valid[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)]; void *data[BCH_BKEY_PTRS_MAX]; __BKEY_PADDED(key, 255); }; struct ec_stripe_head; enum ec_stripe_ref { STRIPE_REF_io, STRIPE_REF_stripe, STRIPE_REF_NR }; struct ec_stripe_new { struct bch_fs *c; struct ec_stripe_head *h; struct mutex lock; struct list_head list; struct hlist_node hash; u64 idx; struct closure iodone; atomic_t ref[STRIPE_REF_NR]; int err; u8 nr_data; u8 nr_parity; bool allocated; bool pending; bool have_existing_stripe; unsigned long blocks_gotten[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)]; unsigned long blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)]; open_bucket_idx_t blocks[BCH_BKEY_PTRS_MAX]; struct disk_reservation res; struct ec_stripe_buf new_stripe; struct ec_stripe_buf existing_stripe; }; struct ec_stripe_head { struct list_head list; struct mutex lock; unsigned disk_label; unsigned algo; unsigned redundancy; enum bch_watermark watermark; bool insufficient_devs; unsigned long rw_devs_change_count; u64 nr_created; struct bch_devs_mask devs; unsigned nr_active_devs; unsigned blocksize; struct dev_stripe_state block_stripe; struct dev_stripe_state parity_stripe; struct ec_stripe_new *s; }; int bch2_ec_read_extent(struct btree_trans *, struct bch_read_bio *, struct bkey_s_c); void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *); void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *); int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *); void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *); struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *, unsigned, unsigned, unsigned, enum bch_watermark, struct closure *); void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t); void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t); void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t); void bch2_do_stripe_deletes(struct bch_fs *); void bch2_ec_do_stripe_creates(struct bch_fs *); void bch2_ec_stripe_new_free(struct bch_fs *, struct ec_stripe_new *); static inline void ec_stripe_new_get(struct ec_stripe_new *s, enum ec_stripe_ref ref) { atomic_inc(&s->ref[ref]); } static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s, enum ec_stripe_ref ref) { BUG_ON(atomic_read(&s->ref[ref]) <= 0); if (atomic_dec_and_test(&s->ref[ref])) switch (ref) { case STRIPE_REF_stripe: bch2_ec_stripe_new_free(c, s); break; case STRIPE_REF_io: bch2_ec_do_stripe_creates(c); break; default: BUG(); } } int bch2_dev_remove_stripes(struct bch_fs *, unsigned); void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *); void bch2_fs_ec_stop(struct bch_fs *); void bch2_fs_ec_flush(struct bch_fs *); int bch2_stripes_read(struct bch_fs *); void bch2_stripes_heap_to_text(struct printbuf *, struct bch_fs *); void bch2_new_stripes_to_text(struct printbuf *, struct bch_fs *); void bch2_fs_ec_exit(struct bch_fs *); void bch2_fs_ec_init_early(struct bch_fs *); int bch2_fs_ec_init(struct bch_fs *); #endif /* _BCACHEFS_EC_H */
207 1291 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_DST_OPS_H #define _NET_DST_OPS_H #include <linux/types.h> #include <linux/percpu_counter.h> #include <linux/cache.h> struct dst_entry; struct kmem_cachep; struct net_device; struct sk_buff; struct sock; struct net; struct dst_ops { unsigned short family; unsigned int gc_thresh; void (*gc)(struct dst_ops *ops); struct dst_entry * (*check)(struct dst_entry *, __u32 cookie); unsigned int (*default_advmss)(const struct dst_entry *); unsigned int (*mtu)(const struct dst_entry *); u32 * (*cow_metrics)(struct dst_entry *, unsigned long); void (*destroy)(struct dst_entry *); void (*ifdown)(struct dst_entry *, struct net_device *dev); void (*negative_advice)(struct sock *sk, struct dst_entry *); void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, bool confirm_neigh); void (*redirect)(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); int (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb); struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); void (*confirm_neigh)(const struct dst_entry *dst, const void *daddr); struct kmem_cache *kmem_cachep; struct percpu_counter pcpuc_entries ____cacheline_aligned_in_smp; }; static inline int dst_entries_get_fast(struct dst_ops *dst) { return percpu_counter_read_positive(&dst->pcpuc_entries); } static inline int dst_entries_get_slow(struct dst_ops *dst) { return percpu_counter_sum_positive(&dst->pcpuc_entries); } #define DST_PERCPU_COUNTER_BATCH 32 static inline void dst_entries_add(struct dst_ops *dst, int val) { percpu_counter_add_batch(&dst->pcpuc_entries, val, DST_PERCPU_COUNTER_BATCH); } static inline int dst_entries_init(struct dst_ops *dst) { return percpu_counter_init(&dst->pcpuc_entries, 0, GFP_KERNEL); } static inline void dst_entries_destroy(struct dst_ops *dst) { percpu_counter_destroy(&dst->pcpuc_entries); } #endif
67 67 66 67 98 99 99 99 99 64 64 64 64 4 4 28 28 67 67 67 67 64 64 4 67 3 99 99 8 95 99 64 56 51 64 64 28 28 28 67 67 67 43 43 9 37 43 2 16 16 31 31 43 43 43 43 43 43 2 2 2 2 2 2 16 16 16 27 27 27 82 112 127 40 87 43 44 44 93 34 28 28 2 2 1 28 143 143 6 142 28 142 4 143 87 72 73 72 73 72 72 58 72 14 58 87 28 6 28 86 86 23 81 102 90 90 14 90 90 82 82 19 1 82 18 81 21 81 81 77 77 4 77 17 77 5 68 68 11 82 80 82 77 19 77 82 77 82 2 44 44 42 4 41 7 7 7 41 9 41 1 1 1 1 1 1 41 37 14 37 44 41 41 44 9 44 24 23 24 23 23 23 23 23 23 23 16 3 16 15 15 13 13 13 13 12 10 12 16 3 10 10 10 10 9 8 6 8 6 8 8 7 7 7 6 2 2 1 1 7 6 6 5 2 2 1 1 8 8 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2004, OGAWA Hirofumi */ #include <linux/blkdev.h> #include <linux/sched/signal.h> #include <linux/backing-dev-defs.h> #include "fat.h" struct fatent_operations { void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); void (*ent_set_ptr)(struct fat_entry *, int); int (*ent_bread)(struct super_block *, struct fat_entry *, int, sector_t); int (*ent_get)(struct fat_entry *); void (*ent_put)(struct fat_entry *, int); int (*ent_next)(struct fat_entry *); }; static DEFINE_SPINLOCK(fat12_entry_lock); static void fat12_ent_blocknr(struct super_block *sb, int entry, int *offset, sector_t *blocknr) { struct msdos_sb_info *sbi = MSDOS_SB(sb); int bytes = entry + (entry >> 1); WARN_ON(!fat_valid_entry(sbi, entry)); *offset = bytes & (sb->s_blocksize - 1); *blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits); } static void fat_ent_blocknr(struct super_block *sb, int entry, int *offset, sector_t *blocknr) { struct msdos_sb_info *sbi = MSDOS_SB(sb); int bytes = (entry << sbi->fatent_shift); WARN_ON(!fat_valid_entry(sbi, entry)); *offset = bytes & (sb->s_blocksize - 1); *blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits); } static void fat12_ent_set_ptr(struct fat_entry *fatent, int offset) { struct buffer_head **bhs = fatent->bhs; if (fatent->nr_bhs == 1) { WARN_ON(offset >= (bhs[0]->b_size - 1)); fatent->u.ent12_p[0] = bhs[0]->b_data + offset; fatent->u.ent12_p[1] = bhs[0]->b_data + (offset + 1); } else { WARN_ON(offset != (bhs[0]->b_size - 1)); fatent->u.ent12_p[0] = bhs[0]->b_data + offset; fatent->u.ent12_p[1] = bhs[1]->b_data; } } static void fat16_ent_set_ptr(struct fat_entry *fatent, int offset) { WARN_ON(offset & (2 - 1)); fatent->u.ent16_p = (__le16 *)(fatent->bhs[0]->b_data + offset); } static void fat32_ent_set_ptr(struct fat_entry *fatent, int offset) { WARN_ON(offset & (4 - 1)); fatent->u.ent32_p = (__le32 *)(fatent->bhs[0]->b_data + offset); } static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, int offset, sector_t blocknr) { struct buffer_head **bhs = fatent->bhs; WARN_ON(blocknr < MSDOS_SB(sb)->fat_start); fatent->fat_inode = MSDOS_SB(sb)->fat_inode; bhs[0] = sb_bread(sb, blocknr); if (!bhs[0]) goto err; if ((offset + 1) < sb->s_blocksize) fatent->nr_bhs = 1; else { /* This entry is block boundary, it needs the next block */ blocknr++; bhs[1] = sb_bread(sb, blocknr); if (!bhs[1]) goto err_brelse; fatent->nr_bhs = 2; } fat12_ent_set_ptr(fatent, offset); return 0; err_brelse: brelse(bhs[0]); err: fat_msg_ratelimit(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr); return -EIO; } static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, int offset, sector_t blocknr) { const struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops; WARN_ON(blocknr < MSDOS_SB(sb)->fat_start); fatent->fat_inode = MSDOS_SB(sb)->fat_inode; fatent->bhs[0] = sb_bread(sb, blocknr); if (!fatent->bhs[0]) { fat_msg_ratelimit(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr); return -EIO; } fatent->nr_bhs = 1; ops->ent_set_ptr(fatent, offset); return 0; } static int fat12_ent_get(struct fat_entry *fatent) { u8 **ent12_p = fatent->u.ent12_p; int next; spin_lock(&fat12_entry_lock); if (fatent->entry & 1) next = (*ent12_p[0] >> 4) | (*ent12_p[1] << 4); else next = (*ent12_p[1] << 8) | *ent12_p[0]; spin_unlock(&fat12_entry_lock); next &= 0x0fff; if (next >= BAD_FAT12) next = FAT_ENT_EOF; return next; } static int fat16_ent_get(struct fat_entry *fatent) { int next = le16_to_cpu(*fatent->u.ent16_p); WARN_ON((unsigned long)fatent->u.ent16_p & (2 - 1)); if (next >= BAD_FAT16) next = FAT_ENT_EOF; return next; } static int fat32_ent_get(struct fat_entry *fatent) { int next = le32_to_cpu(*fatent->u.ent32_p) & 0x0fffffff; WARN_ON((unsigned long)fatent->u.ent32_p & (4 - 1)); if (next >= BAD_FAT32) next = FAT_ENT_EOF; return next; } static void fat12_ent_put(struct fat_entry *fatent, int new) { u8 **ent12_p = fatent->u.ent12_p; if (new == FAT_ENT_EOF) new = EOF_FAT12; spin_lock(&fat12_entry_lock); if (fatent->entry & 1) { *ent12_p[0] = (new << 4) | (*ent12_p[0] & 0x0f); *ent12_p[1] = new >> 4; } else { *ent12_p[0] = new & 0xff; *ent12_p[1] = (*ent12_p[1] & 0xf0) | (new >> 8); } spin_unlock(&fat12_entry_lock); mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); if (fatent->nr_bhs == 2) mark_buffer_dirty_inode(fatent->bhs[1], fatent->fat_inode); } static void fat16_ent_put(struct fat_entry *fatent, int new) { if (new == FAT_ENT_EOF) new = EOF_FAT16; *fatent->u.ent16_p = cpu_to_le16(new); mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); } static void fat32_ent_put(struct fat_entry *fatent, int new) { WARN_ON(new & 0xf0000000); new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff; *fatent->u.ent32_p = cpu_to_le32(new); mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); } static int fat12_ent_next(struct fat_entry *fatent) { u8 **ent12_p = fatent->u.ent12_p; struct buffer_head **bhs = fatent->bhs; u8 *nextp = ent12_p[1] + 1 + (fatent->entry & 1); fatent->entry++; if (fatent->nr_bhs == 1) { WARN_ON(ent12_p[0] > (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 2))); WARN_ON(ent12_p[1] > (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))); if (nextp < (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))) { ent12_p[0] = nextp - 1; ent12_p[1] = nextp; return 1; } } else { WARN_ON(ent12_p[0] != (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))); WARN_ON(ent12_p[1] != (u8 *)bhs[1]->b_data); ent12_p[0] = nextp - 1; ent12_p[1] = nextp; brelse(bhs[0]); bhs[0] = bhs[1]; fatent->nr_bhs = 1; return 1; } ent12_p[0] = NULL; ent12_p[1] = NULL; return 0; } static int fat16_ent_next(struct fat_entry *fatent) { const struct buffer_head *bh = fatent->bhs[0]; fatent->entry++; if (fatent->u.ent16_p < (__le16 *)(bh->b_data + (bh->b_size - 2))) { fatent->u.ent16_p++; return 1; } fatent->u.ent16_p = NULL; return 0; } static int fat32_ent_next(struct fat_entry *fatent) { const struct buffer_head *bh = fatent->bhs[0]; fatent->entry++; if (fatent->u.ent32_p < (__le32 *)(bh->b_data + (bh->b_size - 4))) { fatent->u.ent32_p++; return 1; } fatent->u.ent32_p = NULL; return 0; } static const struct fatent_operations fat12_ops = { .ent_blocknr = fat12_ent_blocknr, .ent_set_ptr = fat12_ent_set_ptr, .ent_bread = fat12_ent_bread, .ent_get = fat12_ent_get, .ent_put = fat12_ent_put, .ent_next = fat12_ent_next, }; static const struct fatent_operations fat16_ops = { .ent_blocknr = fat_ent_blocknr, .ent_set_ptr = fat16_ent_set_ptr, .ent_bread = fat_ent_bread, .ent_get = fat16_ent_get, .ent_put = fat16_ent_put, .ent_next = fat16_ent_next, }; static const struct fatent_operations fat32_ops = { .ent_blocknr = fat_ent_blocknr, .ent_set_ptr = fat32_ent_set_ptr, .ent_bread = fat_ent_bread, .ent_get = fat32_ent_get, .ent_put = fat32_ent_put, .ent_next = fat32_ent_next, }; static inline void lock_fat(struct msdos_sb_info *sbi) { mutex_lock(&sbi->fat_lock); } static inline void unlock_fat(struct msdos_sb_info *sbi) { mutex_unlock(&sbi->fat_lock); } void fat_ent_access_init(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); mutex_init(&sbi->fat_lock); if (is_fat32(sbi)) { sbi->fatent_shift = 2; sbi->fatent_ops = &fat32_ops; } else if (is_fat16(sbi)) { sbi->fatent_shift = 1; sbi->fatent_ops = &fat16_ops; } else if (is_fat12(sbi)) { sbi->fatent_shift = -1; sbi->fatent_ops = &fat12_ops; } else { fat_fs_error(sb, "invalid FAT variant, %u bits", sbi->fat_bits); } } static void mark_fsinfo_dirty(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); if (sb_rdonly(sb) || !is_fat32(sbi)) return; __mark_inode_dirty(sbi->fsinfo_inode, I_DIRTY_SYNC); } static inline int fat_ent_update_ptr(struct super_block *sb, struct fat_entry *fatent, int offset, sector_t blocknr) { struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct buffer_head **bhs = fatent->bhs; /* Is this fatent's blocks including this entry? */ if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr) return 0; if (is_fat12(sbi)) { if ((offset + 1) < sb->s_blocksize) { /* This entry is on bhs[0]. */ if (fatent->nr_bhs == 2) { brelse(bhs[1]); fatent->nr_bhs = 1; } } else { /* This entry needs the next block. */ if (fatent->nr_bhs != 2) return 0; if (bhs[1]->b_blocknr != (blocknr + 1)) return 0; } } ops->ent_set_ptr(fatent, offset); return 1; } int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); const struct fatent_operations *ops = sbi->fatent_ops; int err, offset; sector_t blocknr; if (!fat_valid_entry(sbi, entry)) { fatent_brelse(fatent); fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry); return -EIO; } fatent_set_entry(fatent, entry); ops->ent_blocknr(sb, entry, &offset, &blocknr); if (!fat_ent_update_ptr(sb, fatent, offset, blocknr)) { fatent_brelse(fatent); err = ops->ent_bread(sb, fatent, offset, blocknr); if (err) return err; } return ops->ent_get(fatent); } /* FIXME: We can write the blocks as more big chunk. */ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs, int nr_bhs) { struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *c_bh; int err, n, copy; err = 0; for (copy = 1; copy < sbi->fats; copy++) { sector_t backup_fat = sbi->fat_length * copy; for (n = 0; n < nr_bhs; n++) { c_bh = sb_getblk(sb, backup_fat + bhs[n]->b_blocknr); if (!c_bh) { err = -ENOMEM; goto error; } /* Avoid race with userspace read via bdev */ lock_buffer(c_bh); memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize); set_buffer_uptodate(c_bh); unlock_buffer(c_bh); mark_buffer_dirty_inode(c_bh, sbi->fat_inode); if (sb->s_flags & SB_SYNCHRONOUS) err = sync_dirty_buffer(c_bh); brelse(c_bh); if (err) goto error; } } error: return err; } int fat_ent_write(struct inode *inode, struct fat_entry *fatent, int new, int wait) { struct super_block *sb = inode->i_sb; const struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops; int err; ops->ent_put(fatent, new); if (wait) { err = fat_sync_bhs(fatent->bhs, fatent->nr_bhs); if (err) return err; } return fat_mirror_bhs(sb, fatent->bhs, fatent->nr_bhs); } static inline int fat_ent_next(struct msdos_sb_info *sbi, struct fat_entry *fatent) { if (sbi->fatent_ops->ent_next(fatent)) { if (fatent->entry < sbi->max_cluster) return 1; } return 0; } static inline int fat_ent_read_block(struct super_block *sb, struct fat_entry *fatent) { const struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops; sector_t blocknr; int offset; fatent_brelse(fatent); ops->ent_blocknr(sb, fatent->entry, &offset, &blocknr); return ops->ent_bread(sb, fatent, offset, blocknr); } static void fat_collect_bhs(struct buffer_head **bhs, int *nr_bhs, struct fat_entry *fatent) { int n, i; for (n = 0; n < fatent->nr_bhs; n++) { for (i = 0; i < *nr_bhs; i++) { if (fatent->bhs[n] == bhs[i]) break; } if (i == *nr_bhs) { get_bh(fatent->bhs[n]); bhs[i] = fatent->bhs[n]; (*nr_bhs)++; } } } int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct fat_entry fatent, prev_ent; struct buffer_head *bhs[MAX_BUF_PER_PAGE]; int i, count, err, nr_bhs, idx_clus; BUG_ON(nr_cluster > (MAX_BUF_PER_PAGE / 2)); /* fixed limit */ lock_fat(sbi); if (sbi->free_clusters != -1 && sbi->free_clus_valid && sbi->free_clusters < nr_cluster) { unlock_fat(sbi); return -ENOSPC; } err = nr_bhs = idx_clus = 0; count = FAT_START_ENT; fatent_init(&prev_ent); fatent_init(&fatent); fatent_set_entry(&fatent, sbi->prev_free + 1); while (count < sbi->max_cluster) { if (fatent.entry >= sbi->max_cluster) fatent.entry = FAT_START_ENT; fatent_set_entry(&fatent, fatent.entry); err = fat_ent_read_block(sb, &fatent); if (err) goto out; /* Find the free entries in a block */ do { if (ops->ent_get(&fatent) == FAT_ENT_FREE) { int entry = fatent.entry; /* make the cluster chain */ ops->ent_put(&fatent, FAT_ENT_EOF); if (prev_ent.nr_bhs) ops->ent_put(&prev_ent, entry); fat_collect_bhs(bhs, &nr_bhs, &fatent); sbi->prev_free = entry; if (sbi->free_clusters != -1) sbi->free_clusters--; cluster[idx_clus] = entry; idx_clus++; if (idx_clus == nr_cluster) goto out; /* * fat_collect_bhs() gets ref-count of bhs, * so we can still use the prev_ent. */ prev_ent = fatent; } count++; if (count == sbi->max_cluster) break; } while (fat_ent_next(sbi, &fatent)); } /* Couldn't allocate the free entries */ sbi->free_clusters = 0; sbi->free_clus_valid = 1; err = -ENOSPC; out: unlock_fat(sbi); mark_fsinfo_dirty(sb); fatent_brelse(&fatent); if (!err) { if (inode_needs_sync(inode)) err = fat_sync_bhs(bhs, nr_bhs); if (!err) err = fat_mirror_bhs(sb, bhs, nr_bhs); } for (i = 0; i < nr_bhs; i++) brelse(bhs[i]); if (err && idx_clus) fat_free_clusters(inode, cluster[0]); return err; } int fat_free_clusters(struct inode *inode, int cluster) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct fat_entry fatent; struct buffer_head *bhs[MAX_BUF_PER_PAGE]; int i, err, nr_bhs; int first_cl = cluster, dirty_fsinfo = 0; nr_bhs = 0; fatent_init(&fatent); lock_fat(sbi); do { cluster = fat_ent_read(inode, &fatent, cluster); if (cluster < 0) { err = cluster; goto error; } else if (cluster == FAT_ENT_FREE) { fat_fs_error(sb, "%s: deleting FAT entry beyond EOF", __func__); err = -EIO; goto error; } if (sbi->options.discard) { /* * Issue discard for the sectors we no longer * care about, batching contiguous clusters * into one request */ if (cluster != fatent.entry + 1) { int nr_clus = fatent.entry - first_cl + 1; sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), nr_clus * sbi->sec_per_clus, GFP_NOFS, 0); first_cl = cluster; } } ops->ent_put(&fatent, FAT_ENT_FREE); if (sbi->free_clusters != -1) { sbi->free_clusters++; dirty_fsinfo = 1; } if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) { if (sb->s_flags & SB_SYNCHRONOUS) { err = fat_sync_bhs(bhs, nr_bhs); if (err) goto error; } err = fat_mirror_bhs(sb, bhs, nr_bhs); if (err) goto error; for (i = 0; i < nr_bhs; i++) brelse(bhs[i]); nr_bhs = 0; } fat_collect_bhs(bhs, &nr_bhs, &fatent); } while (cluster != FAT_ENT_EOF); if (sb->s_flags & SB_SYNCHRONOUS) { err = fat_sync_bhs(bhs, nr_bhs); if (err) goto error; } err = fat_mirror_bhs(sb, bhs, nr_bhs); error: fatent_brelse(&fatent); for (i = 0; i < nr_bhs; i++) brelse(bhs[i]); unlock_fat(sbi); if (dirty_fsinfo) mark_fsinfo_dirty(sb); return err; } EXPORT_SYMBOL_GPL(fat_free_clusters); struct fatent_ra { sector_t cur; sector_t limit; unsigned int ra_blocks; sector_t ra_advance; sector_t ra_next; sector_t ra_limit; }; static void fat_ra_init(struct super_block *sb, struct fatent_ra *ra, struct fat_entry *fatent, int ent_limit) { struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; sector_t blocknr, block_end; int offset; /* * This is the sequential read, so ra_pages * 2 (but try to * align the optimal hardware IO size). * [BTW, 128kb covers the whole sectors for FAT12 and FAT16] */ unsigned long ra_pages = sb->s_bdi->ra_pages; unsigned int reada_blocks; if (fatent->entry >= ent_limit) return; if (ra_pages > sb->s_bdi->io_pages) ra_pages = rounddown(ra_pages, sb->s_bdi->io_pages); reada_blocks = ra_pages << (PAGE_SHIFT - sb->s_blocksize_bits + 1); /* Initialize the range for sequential read */ ops->ent_blocknr(sb, fatent->entry, &offset, &blocknr); ops->ent_blocknr(sb, ent_limit - 1, &offset, &block_end); ra->cur = 0; ra->limit = (block_end + 1) - blocknr; /* Advancing the window at half size */ ra->ra_blocks = reada_blocks >> 1; ra->ra_advance = ra->cur; ra->ra_next = ra->cur; ra->ra_limit = ra->cur + min_t(sector_t, reada_blocks, ra->limit); } /* Assuming to be called before reading a new block (increments ->cur). */ static void fat_ent_reada(struct super_block *sb, struct fatent_ra *ra, struct fat_entry *fatent) { if (ra->ra_next >= ra->ra_limit) return; if (ra->cur >= ra->ra_advance) { struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct blk_plug plug; sector_t blocknr, diff; int offset; ops->ent_blocknr(sb, fatent->entry, &offset, &blocknr); diff = blocknr - ra->cur; blk_start_plug(&plug); /* * FIXME: we would want to directly use the bio with * pages to reduce the number of segments. */ for (; ra->ra_next < ra->ra_limit; ra->ra_next++) sb_breadahead(sb, ra->ra_next + diff); blk_finish_plug(&plug); /* Advance the readahead window */ ra->ra_advance += ra->ra_blocks; ra->ra_limit += min_t(sector_t, ra->ra_blocks, ra->limit - ra->ra_limit); } ra->cur++; } int fat_count_free_clusters(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct fat_entry fatent; struct fatent_ra fatent_ra; int err = 0, free; lock_fat(sbi); if (sbi->free_clusters != -1 && sbi->free_clus_valid) goto out; free = 0; fatent_init(&fatent); fatent_set_entry(&fatent, FAT_START_ENT); fat_ra_init(sb, &fatent_ra, &fatent, sbi->max_cluster); while (fatent.entry < sbi->max_cluster) { /* readahead of fat blocks */ fat_ent_reada(sb, &fatent_ra, &fatent); err = fat_ent_read_block(sb, &fatent); if (err) goto out; do { if (ops->ent_get(&fatent) == FAT_ENT_FREE) free++; } while (fat_ent_next(sbi, &fatent)); cond_resched(); } sbi->free_clusters = free; sbi->free_clus_valid = 1; mark_fsinfo_dirty(sb); fatent_brelse(&fatent); out: unlock_fat(sbi); return err; } static int fat_trim_clusters(struct super_block *sb, u32 clus, u32 nr_clus) { struct msdos_sb_info *sbi = MSDOS_SB(sb); return sb_issue_discard(sb, fat_clus_to_blknr(sbi, clus), nr_clus * sbi->sec_per_clus, GFP_NOFS, 0); } int fat_trim_fs(struct inode *inode, struct fstrim_range *range) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct fat_entry fatent; struct fatent_ra fatent_ra; u64 ent_start, ent_end, minlen, trimmed = 0; u32 free = 0; int err = 0; /* * FAT data is organized as clusters, trim at the granulary of cluster. * * fstrim_range is in byte, convert values to cluster index. * Treat sectors before data region as all used, not to trim them. */ ent_start = max_t(u64, range->start>>sbi->cluster_bits, FAT_START_ENT); ent_end = ent_start + (range->len >> sbi->cluster_bits) - 1; minlen = range->minlen >> sbi->cluster_bits; if (ent_start >= sbi->max_cluster || range->len < sbi->cluster_size) return -EINVAL; if (ent_end >= sbi->max_cluster) ent_end = sbi->max_cluster - 1; fatent_init(&fatent); lock_fat(sbi); fatent_set_entry(&fatent, ent_start); fat_ra_init(sb, &fatent_ra, &fatent, ent_end + 1); while (fatent.entry <= ent_end) { /* readahead of fat blocks */ fat_ent_reada(sb, &fatent_ra, &fatent); err = fat_ent_read_block(sb, &fatent); if (err) goto error; do { if (ops->ent_get(&fatent) == FAT_ENT_FREE) { free++; } else if (free) { if (free >= minlen) { u32 clus = fatent.entry - free; err = fat_trim_clusters(sb, clus, free); if (err && err != -EOPNOTSUPP) goto error; if (!err) trimmed += free; err = 0; } free = 0; } } while (fat_ent_next(sbi, &fatent) && fatent.entry <= ent_end); if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto error; } if (need_resched()) { fatent_brelse(&fatent); unlock_fat(sbi); cond_resched(); lock_fat(sbi); } } /* handle scenario when tail entries are all free */ if (free && free >= minlen) { u32 clus = fatent.entry - free; err = fat_trim_clusters(sb, clus, free); if (err && err != -EOPNOTSUPP) goto error; if (!err) trimmed += free; err = 0; } error: fatent_brelse(&fatent); unlock_fat(sbi); range->len = trimmed << sbi->cluster_bits; return err; }
7 7 7 7 10 10 10 10 11 10 10 10 8 8 8 8 8 8 5 5 5 5 2 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/module.h> #include <linux/backing-dev.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/mm.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/workqueue.h> #include <linux/smp.h> #include "blk.h" #include "blk-mq.h" static void blk_mq_sysfs_release(struct kobject *kobj) { struct blk_mq_ctxs *ctxs = container_of(kobj, struct blk_mq_ctxs, kobj); free_percpu(ctxs->queue_ctx); kfree(ctxs); } static void blk_mq_ctx_sysfs_release(struct kobject *kobj) { struct blk_mq_ctx *ctx = container_of(kobj, struct blk_mq_ctx, kobj); /* ctx->ctxs won't be released until all ctx are freed */ kobject_put(&ctx->ctxs->kobj); } static void blk_mq_hw_sysfs_release(struct kobject *kobj) { struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj); blk_free_flush_queue(hctx->fq); sbitmap_free(&hctx->ctx_map); free_cpumask_var(hctx->cpumask); kfree(hctx->ctxs); kfree(hctx); } struct blk_mq_hw_ctx_sysfs_entry { struct attribute attr; ssize_t (*show)(struct blk_mq_hw_ctx *, char *); }; static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj, struct attribute *attr, char *page) { struct blk_mq_hw_ctx_sysfs_entry *entry; struct blk_mq_hw_ctx *hctx; struct request_queue *q; ssize_t res; entry = container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr); hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj); q = hctx->queue; if (!entry->show) return -EIO; mutex_lock(&q->sysfs_lock); res = entry->show(hctx, page); mutex_unlock(&q->sysfs_lock); return res; } static ssize_t blk_mq_hw_sysfs_nr_tags_show(struct blk_mq_hw_ctx *hctx, char *page) { return sprintf(page, "%u\n", hctx->tags->nr_tags); } static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx, char *page) { return sprintf(page, "%u\n", hctx->tags->nr_reserved_tags); } static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) { const size_t size = PAGE_SIZE - 1; unsigned int i, first = 1; int ret = 0, pos = 0; for_each_cpu(i, hctx->cpumask) { if (first) ret = snprintf(pos + page, size - pos, "%u", i); else ret = snprintf(pos + page, size - pos, ", %u", i); if (ret >= size - pos) break; first = 0; pos += ret; } ret = snprintf(pos + page, size + 1 - pos, "\n"); return pos + ret; } static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = { .attr = {.name = "nr_tags", .mode = 0444 }, .show = blk_mq_hw_sysfs_nr_tags_show, }; static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = { .attr = {.name = "nr_reserved_tags", .mode = 0444 }, .show = blk_mq_hw_sysfs_nr_reserved_tags_show, }; static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = { .attr = {.name = "cpu_list", .mode = 0444 }, .show = blk_mq_hw_sysfs_cpus_show, }; static struct attribute *default_hw_ctx_attrs[] = { &blk_mq_hw_sysfs_nr_tags.attr, &blk_mq_hw_sysfs_nr_reserved_tags.attr, &blk_mq_hw_sysfs_cpus.attr, NULL, }; ATTRIBUTE_GROUPS(default_hw_ctx); static const struct sysfs_ops blk_mq_hw_sysfs_ops = { .show = blk_mq_hw_sysfs_show, }; static const struct kobj_type blk_mq_ktype = { .release = blk_mq_sysfs_release, }; static const struct kobj_type blk_mq_ctx_ktype = { .release = blk_mq_ctx_sysfs_release, }; static const struct kobj_type blk_mq_hw_ktype = { .sysfs_ops = &blk_mq_hw_sysfs_ops, .default_groups = default_hw_ctx_groups, .release = blk_mq_hw_sysfs_release, }; static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx) { struct blk_mq_ctx *ctx; int i; if (!hctx->nr_ctx) return; hctx_for_each_ctx(hctx, ctx, i) kobject_del(&ctx->kobj); kobject_del(&hctx->kobj); } static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; struct blk_mq_ctx *ctx; int i, j, ret; if (!hctx->nr_ctx) return 0; ret = kobject_add(&hctx->kobj, q->mq_kobj, "%u", hctx->queue_num); if (ret) return ret; hctx_for_each_ctx(hctx, ctx, i) { ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu); if (ret) goto out; } return 0; out: hctx_for_each_ctx(hctx, ctx, j) { if (j < i) kobject_del(&ctx->kobj); } kobject_del(&hctx->kobj); return ret; } void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx) { kobject_init(&hctx->kobj, &blk_mq_hw_ktype); } void blk_mq_sysfs_deinit(struct request_queue *q) { struct blk_mq_ctx *ctx; int cpu; for_each_possible_cpu(cpu) { ctx = per_cpu_ptr(q->queue_ctx, cpu); kobject_put(&ctx->kobj); } kobject_put(q->mq_kobj); } void blk_mq_sysfs_init(struct request_queue *q) { struct blk_mq_ctx *ctx; int cpu; kobject_init(q->mq_kobj, &blk_mq_ktype); for_each_possible_cpu(cpu) { ctx = per_cpu_ptr(q->queue_ctx, cpu); kobject_get(q->mq_kobj); kobject_init(&ctx->kobj, &blk_mq_ctx_ktype); } } int blk_mq_sysfs_register(struct gendisk *disk) { struct request_queue *q = disk->queue; struct blk_mq_hw_ctx *hctx; unsigned long i, j; int ret; ret = kobject_add(q->mq_kobj, &disk_to_dev(disk)->kobj, "mq"); if (ret < 0) return ret; kobject_uevent(q->mq_kobj, KOBJ_ADD); mutex_lock(&q->tag_set->tag_list_lock); queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); if (ret) goto out_unreg; } mutex_unlock(&q->tag_set->tag_list_lock); return 0; out_unreg: queue_for_each_hw_ctx(q, hctx, j) { if (j < i) blk_mq_unregister_hctx(hctx); } mutex_unlock(&q->tag_set->tag_list_lock); kobject_uevent(q->mq_kobj, KOBJ_REMOVE); kobject_del(q->mq_kobj); return ret; } void blk_mq_sysfs_unregister(struct gendisk *disk) { struct request_queue *q = disk->queue; struct blk_mq_hw_ctx *hctx; unsigned long i; mutex_lock(&q->tag_set->tag_list_lock); queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); mutex_unlock(&q->tag_set->tag_list_lock); kobject_uevent(q->mq_kobj, KOBJ_REMOVE); kobject_del(q->mq_kobj); } void blk_mq_sysfs_unregister_hctxs(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; unsigned long i; if (!blk_queue_registered(q)) return; queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); } int blk_mq_sysfs_register_hctxs(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; unsigned long i; int ret = 0; if (!blk_queue_registered(q)) goto out; queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); if (ret) break; } out: return ret; }
4 4 4 1 1 1 4 4 3 2 2 2 2 1 1 1 1 1 1 1 2 4 2 2 1 1 2 2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 /* BNEP implementation for Linux Bluetooth stack (BlueZ). Copyright (C) 2001-2002 Inventel Systemes Written 2001-2002 by Clément Moreau <clement.moreau@inventel.fr> David Libault <david.libault@inventel.fr> Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ #include <linux/module.h> #include <linux/kthread.h> #include <linux/file.h> #include <linux/etherdevice.h> #include <linux/unaligned.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/l2cap.h> #include <net/bluetooth/hci_core.h> #include "bnep.h" #define VERSION "1.3" static bool compress_src = true; static bool compress_dst = true; static LIST_HEAD(bnep_session_list); static DECLARE_RWSEM(bnep_session_sem); static struct bnep_session *__bnep_get_session(u8 *dst) { struct bnep_session *s; BT_DBG(""); list_for_each_entry(s, &bnep_session_list, list) if (ether_addr_equal(dst, s->eh.h_source)) return s; return NULL; } static void __bnep_link_session(struct bnep_session *s) { list_add(&s->list, &bnep_session_list); } static void __bnep_unlink_session(struct bnep_session *s) { list_del(&s->list); } static int bnep_send(struct bnep_session *s, void *data, size_t len) { struct socket *sock = s->sock; struct kvec iv = { data, len }; return kernel_sendmsg(sock, &s->msg, &iv, 1, len); } static int bnep_send_rsp(struct bnep_session *s, u8 ctrl, u16 resp) { struct bnep_control_rsp rsp; rsp.type = BNEP_CONTROL; rsp.ctrl = ctrl; rsp.resp = htons(resp); return bnep_send(s, &rsp, sizeof(rsp)); } #ifdef CONFIG_BT_BNEP_PROTO_FILTER static inline void bnep_set_default_proto_filter(struct bnep_session *s) { /* (IPv4, ARP) */ s->proto_filter[0].start = ETH_P_IP; s->proto_filter[0].end = ETH_P_ARP; /* (RARP, AppleTalk) */ s->proto_filter[1].start = ETH_P_RARP; s->proto_filter[1].end = ETH_P_AARP; /* (IPX, IPv6) */ s->proto_filter[2].start = ETH_P_IPX; s->proto_filter[2].end = ETH_P_IPV6; } #endif static int bnep_ctrl_set_netfilter(struct bnep_session *s, __be16 *data, int len) { int n; if (len < 2) return -EILSEQ; n = get_unaligned_be16(data); data++; len -= 2; if (len < n) return -EILSEQ; BT_DBG("filter len %d", n); #ifdef CONFIG_BT_BNEP_PROTO_FILTER n /= 4; if (n <= BNEP_MAX_PROTO_FILTERS) { struct bnep_proto_filter *f = s->proto_filter; int i; for (i = 0; i < n; i++) { f[i].start = get_unaligned_be16(data++); f[i].end = get_unaligned_be16(data++); BT_DBG("proto filter start %u end %u", f[i].start, f[i].end); } if (i < BNEP_MAX_PROTO_FILTERS) memset(f + i, 0, sizeof(*f)); if (n == 0) bnep_set_default_proto_filter(s); bnep_send_rsp(s, BNEP_FILTER_NET_TYPE_RSP, BNEP_SUCCESS); } else { bnep_send_rsp(s, BNEP_FILTER_NET_TYPE_RSP, BNEP_FILTER_LIMIT_REACHED); } #else bnep_send_rsp(s, BNEP_FILTER_NET_TYPE_RSP, BNEP_FILTER_UNSUPPORTED_REQ); #endif return 0; } static int bnep_ctrl_set_mcfilter(struct bnep_session *s, u8 *data, int len) { int n; if (len < 2) return -EILSEQ; n = get_unaligned_be16(data); data += 2; len -= 2; if (len < n) return -EILSEQ; BT_DBG("filter len %d", n); #ifdef CONFIG_BT_BNEP_MC_FILTER n /= (ETH_ALEN * 2); if (n > 0) { int i; s->mc_filter = 0; /* Always send broadcast */ set_bit(bnep_mc_hash(s->dev->broadcast), (ulong *) &s->mc_filter); /* Add address ranges to the multicast hash */ for (; n > 0; n--) { u8 a1[6], *a2; memcpy(a1, data, ETH_ALEN); data += ETH_ALEN; a2 = data; data += ETH_ALEN; BT_DBG("mc filter %pMR -> %pMR", a1, a2); /* Iterate from a1 to a2 */ set_bit(bnep_mc_hash(a1), (ulong *) &s->mc_filter); while (memcmp(a1, a2, 6) < 0 && s->mc_filter != ~0LL) { /* Increment a1 */ i = 5; while (i >= 0 && ++a1[i--] == 0) ; set_bit(bnep_mc_hash(a1), (ulong *) &s->mc_filter); } } } BT_DBG("mc filter hash 0x%llx", s->mc_filter); bnep_send_rsp(s, BNEP_FILTER_MULTI_ADDR_RSP, BNEP_SUCCESS); #else bnep_send_rsp(s, BNEP_FILTER_MULTI_ADDR_RSP, BNEP_FILTER_UNSUPPORTED_REQ); #endif return 0; } static int bnep_rx_control(struct bnep_session *s, void *data, int len) { u8 cmd = *(u8 *)data; int err = 0; data++; len--; switch (cmd) { case BNEP_CMD_NOT_UNDERSTOOD: case BNEP_SETUP_CONN_RSP: case BNEP_FILTER_NET_TYPE_RSP: case BNEP_FILTER_MULTI_ADDR_RSP: /* Ignore these for now */ break; case BNEP_FILTER_NET_TYPE_SET: err = bnep_ctrl_set_netfilter(s, data, len); break; case BNEP_FILTER_MULTI_ADDR_SET: err = bnep_ctrl_set_mcfilter(s, data, len); break; case BNEP_SETUP_CONN_REQ: /* Successful response should be sent only once */ if (test_bit(BNEP_SETUP_RESPONSE, &s->flags) && !test_and_set_bit(BNEP_SETUP_RSP_SENT, &s->flags)) err = bnep_send_rsp(s, BNEP_SETUP_CONN_RSP, BNEP_SUCCESS); else err = bnep_send_rsp(s, BNEP_SETUP_CONN_RSP, BNEP_CONN_NOT_ALLOWED); break; default: { u8 pkt[3]; pkt[0] = BNEP_CONTROL; pkt[1] = BNEP_CMD_NOT_UNDERSTOOD; pkt[2] = cmd; err = bnep_send(s, pkt, sizeof(pkt)); } break; } return err; } static int bnep_rx_extension(struct bnep_session *s, struct sk_buff *skb) { struct bnep_ext_hdr *h; int err = 0; do { h = (void *) skb->data; if (!skb_pull(skb, sizeof(*h))) { err = -EILSEQ; break; } BT_DBG("type 0x%x len %u", h->type, h->len); switch (h->type & BNEP_TYPE_MASK) { case BNEP_EXT_CONTROL: bnep_rx_control(s, skb->data, skb->len); break; default: /* Unknown extension, skip it. */ break; } if (!skb_pull(skb, h->len)) { err = -EILSEQ; break; } } while (!err && (h->type & BNEP_EXT_HEADER)); return err; } static u8 __bnep_rx_hlen[] = { ETH_HLEN, /* BNEP_GENERAL */ 0, /* BNEP_CONTROL */ 2, /* BNEP_COMPRESSED */ ETH_ALEN + 2, /* BNEP_COMPRESSED_SRC_ONLY */ ETH_ALEN + 2 /* BNEP_COMPRESSED_DST_ONLY */ }; static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) { struct net_device *dev = s->dev; struct sk_buff *nskb; u8 type, ctrl_type; dev->stats.rx_bytes += skb->len; type = *(u8 *) skb->data; skb_pull(skb, 1); ctrl_type = *(u8 *)skb->data; if ((type & BNEP_TYPE_MASK) >= sizeof(__bnep_rx_hlen)) goto badframe; if ((type & BNEP_TYPE_MASK) == BNEP_CONTROL) { if (bnep_rx_control(s, skb->data, skb->len) < 0) { dev->stats.tx_errors++; kfree_skb(skb); return 0; } if (!(type & BNEP_EXT_HEADER)) { kfree_skb(skb); return 0; } /* Verify and pull ctrl message since it's already processed */ switch (ctrl_type) { case BNEP_SETUP_CONN_REQ: /* Pull: ctrl type (1 b), len (1 b), data (len bytes) */ if (!skb_pull(skb, 2 + *(u8 *)(skb->data + 1) * 2)) goto badframe; break; case BNEP_FILTER_MULTI_ADDR_SET: case BNEP_FILTER_NET_TYPE_SET: /* Pull: ctrl type (1 b), len (2 b), data (len bytes) */ if (!skb_pull(skb, 3 + *(u16 *)(skb->data + 1) * 2)) goto badframe; break; default: kfree_skb(skb); return 0; } } else { skb_reset_mac_header(skb); /* Verify and pull out header */ if (!skb_pull(skb, __bnep_rx_hlen[type & BNEP_TYPE_MASK])) goto badframe; s->eh.h_proto = get_unaligned((__be16 *) (skb->data - 2)); } if (type & BNEP_EXT_HEADER) { if (bnep_rx_extension(s, skb) < 0) goto badframe; } /* Strip 802.1p header */ if (ntohs(s->eh.h_proto) == ETH_P_8021Q) { if (!skb_pull(skb, 4)) goto badframe; s->eh.h_proto = get_unaligned((__be16 *) (skb->data - 2)); } /* We have to alloc new skb and copy data here :(. Because original skb * may not be modified and because of the alignment requirements. */ nskb = alloc_skb(2 + ETH_HLEN + skb->len, GFP_KERNEL); if (!nskb) { dev->stats.rx_dropped++; kfree_skb(skb); return -ENOMEM; } skb_reserve(nskb, 2); /* Decompress header and construct ether frame */ switch (type & BNEP_TYPE_MASK) { case BNEP_COMPRESSED: __skb_put_data(nskb, &s->eh, ETH_HLEN); break; case BNEP_COMPRESSED_SRC_ONLY: __skb_put_data(nskb, s->eh.h_dest, ETH_ALEN); __skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN); put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2)); break; case BNEP_COMPRESSED_DST_ONLY: __skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN); __skb_put_data(nskb, s->eh.h_source, ETH_ALEN); put_unaligned(s->eh.h_proto, (__be16 *)__skb_put(nskb, 2)); break; case BNEP_GENERAL: __skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN * 2); put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2)); break; } skb_copy_from_linear_data(skb, __skb_put(nskb, skb->len), skb->len); kfree_skb(skb); dev->stats.rx_packets++; nskb->ip_summed = CHECKSUM_NONE; nskb->protocol = eth_type_trans(nskb, dev); netif_rx(nskb); return 0; badframe: dev->stats.rx_errors++; kfree_skb(skb); return 0; } static u8 __bnep_tx_types[] = { BNEP_GENERAL, BNEP_COMPRESSED_SRC_ONLY, BNEP_COMPRESSED_DST_ONLY, BNEP_COMPRESSED }; static int bnep_tx_frame(struct bnep_session *s, struct sk_buff *skb) { struct ethhdr *eh = (void *) skb->data; struct socket *sock = s->sock; struct kvec iv[3]; int len = 0, il = 0; u8 type = 0; BT_DBG("skb %p dev %p type %u", skb, skb->dev, skb->pkt_type); if (!skb->dev) { /* Control frame sent by us */ goto send; } iv[il++] = (struct kvec) { &type, 1 }; len++; if (compress_src && ether_addr_equal(eh->h_dest, s->eh.h_source)) type |= 0x01; if (compress_dst && ether_addr_equal(eh->h_source, s->eh.h_dest)) type |= 0x02; if (type) skb_pull(skb, ETH_ALEN * 2); type = __bnep_tx_types[type]; switch (type) { case BNEP_COMPRESSED_SRC_ONLY: iv[il++] = (struct kvec) { eh->h_source, ETH_ALEN }; len += ETH_ALEN; break; case BNEP_COMPRESSED_DST_ONLY: iv[il++] = (struct kvec) { eh->h_dest, ETH_ALEN }; len += ETH_ALEN; break; } send: iv[il++] = (struct kvec) { skb->data, skb->len }; len += skb->len; /* FIXME: linearize skb */ { len = kernel_sendmsg(sock, &s->msg, iv, il, len); } kfree_skb(skb); if (len > 0) { s->dev->stats.tx_bytes += len; s->dev->stats.tx_packets++; return 0; } return len; } static int bnep_session(void *arg) { struct bnep_session *s = arg; struct net_device *dev = s->dev; struct sock *sk = s->sock->sk; struct sk_buff *skb; DEFINE_WAIT_FUNC(wait, woken_wake_function); BT_DBG(""); set_user_nice(current, -15); add_wait_queue(sk_sleep(sk), &wait); while (1) { if (atomic_read(&s->terminate)) break; /* RX */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); if (!skb_linearize(skb)) bnep_rx_frame(s, skb); else kfree_skb(skb); } if (sk->sk_state != BT_CONNECTED) break; /* TX */ while ((skb = skb_dequeue(&sk->sk_write_queue))) if (bnep_tx_frame(s, skb)) break; netif_wake_queue(dev); /* * wait_woken() performs the necessary memory barriers * for us; see the header comment for this primitive. */ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } remove_wait_queue(sk_sleep(sk), &wait); /* Cleanup session */ down_write(&bnep_session_sem); /* Delete network device */ unregister_netdev(dev); /* Wakeup user-space polling for socket errors */ s->sock->sk->sk_err = EUNATCH; wake_up_interruptible(sk_sleep(s->sock->sk)); /* Release the socket */ fput(s->sock->file); __bnep_unlink_session(s); up_write(&bnep_session_sem); free_netdev(dev); module_put_and_kthread_exit(0); return 0; } static struct device *bnep_get_device(struct bnep_session *session) { struct l2cap_conn *conn = l2cap_pi(session->sock->sk)->chan->conn; if (!conn || !conn->hcon) return NULL; return &conn->hcon->dev; } static const struct device_type bnep_type = { .name = "bluetooth", }; int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) { u32 valid_flags = BIT(BNEP_SETUP_RESPONSE); struct net_device *dev; struct bnep_session *s, *ss; u8 dst[ETH_ALEN], src[ETH_ALEN]; int err; BT_DBG(""); if (!l2cap_is_socket(sock)) return -EBADFD; if (req->flags & ~valid_flags) return -EINVAL; baswap((void *) dst, &l2cap_pi(sock->sk)->chan->dst); baswap((void *) src, &l2cap_pi(sock->sk)->chan->src); /* session struct allocated as private part of net_device */ dev = alloc_netdev(sizeof(struct bnep_session), (*req->device) ? req->device : "bnep%d", NET_NAME_UNKNOWN, bnep_net_setup); if (!dev) return -ENOMEM; down_write(&bnep_session_sem); ss = __bnep_get_session(dst); if (ss && ss->state == BT_CONNECTED) { err = -EEXIST; goto failed; } s = netdev_priv(dev); /* This is rx header therefore addresses are swapped. * ie. eh.h_dest is our local address. */ memcpy(s->eh.h_dest, &src, ETH_ALEN); memcpy(s->eh.h_source, &dst, ETH_ALEN); eth_hw_addr_set(dev, s->eh.h_dest); s->dev = dev; s->sock = sock; s->role = req->role; s->state = BT_CONNECTED; s->flags = req->flags; s->msg.msg_flags = MSG_NOSIGNAL; #ifdef CONFIG_BT_BNEP_MC_FILTER /* Set default mc filter to not filter out any mc addresses * as defined in the BNEP specification (revision 0.95a) * http://grouper.ieee.org/groups/802/15/Bluetooth/BNEP.pdf */ s->mc_filter = ~0LL; #endif #ifdef CONFIG_BT_BNEP_PROTO_FILTER /* Set default protocol filter */ bnep_set_default_proto_filter(s); #endif SET_NETDEV_DEV(dev, bnep_get_device(s)); SET_NETDEV_DEVTYPE(dev, &bnep_type); err = register_netdev(dev); if (err) goto failed; __bnep_link_session(s); __module_get(THIS_MODULE); s->task = kthread_run(bnep_session, s, "kbnepd %s", dev->name); if (IS_ERR(s->task)) { /* Session thread start failed, gotta cleanup. */ module_put(THIS_MODULE); unregister_netdev(dev); __bnep_unlink_session(s); err = PTR_ERR(s->task); goto failed; } up_write(&bnep_session_sem); strcpy(req->device, dev->name); return 0; failed: up_write(&bnep_session_sem); free_netdev(dev); return err; } int bnep_del_connection(struct bnep_conndel_req *req) { u32 valid_flags = 0; struct bnep_session *s; int err = 0; BT_DBG(""); if (req->flags & ~valid_flags) return -EINVAL; down_read(&bnep_session_sem); s = __bnep_get_session(req->dst); if (s) { atomic_inc(&s->terminate); wake_up_interruptible(sk_sleep(s->sock->sk)); } else err = -ENOENT; up_read(&bnep_session_sem); return err; } static void __bnep_copy_ci(struct bnep_conninfo *ci, struct bnep_session *s) { u32 valid_flags = BIT(BNEP_SETUP_RESPONSE); memset(ci, 0, sizeof(*ci)); memcpy(ci->dst, s->eh.h_source, ETH_ALEN); strcpy(ci->device, s->dev->name); ci->flags = s->flags & valid_flags; ci->state = s->state; ci->role = s->role; } int bnep_get_connlist(struct bnep_connlist_req *req) { struct bnep_session *s; int err = 0, n = 0; down_read(&bnep_session_sem); list_for_each_entry(s, &bnep_session_list, list) { struct bnep_conninfo ci; __bnep_copy_ci(&ci, s); if (copy_to_user(req->ci, &ci, sizeof(ci))) { err = -EFAULT; break; } if (++n >= req->cnum) break; req->ci++; } req->cnum = n; up_read(&bnep_session_sem); return err; } int bnep_get_conninfo(struct bnep_conninfo *ci) { struct bnep_session *s; int err = 0; down_read(&bnep_session_sem); s = __bnep_get_session(ci->dst); if (s) __bnep_copy_ci(ci, s); else err = -ENOENT; up_read(&bnep_session_sem); return err; } static int __init bnep_init(void) { char flt[50] = ""; #ifdef CONFIG_BT_BNEP_PROTO_FILTER strcat(flt, "protocol "); #endif #ifdef CONFIG_BT_BNEP_MC_FILTER strcat(flt, "multicast"); #endif BT_INFO("BNEP (Ethernet Emulation) ver %s", VERSION); if (flt[0]) BT_INFO("BNEP filters: %s", flt); return bnep_sock_init(); } static void __exit bnep_exit(void) { bnep_sock_cleanup(); } module_init(bnep_init); module_exit(bnep_exit); module_param(compress_src, bool, 0644); MODULE_PARM_DESC(compress_src, "Compress sources headers"); module_param(compress_dst, bool, 0644); MODULE_PARM_DESC(compress_dst, "Compress destination headers"); MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth BNEP ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS("bt-proto-4");
518 43 13 486 10 461 72 75 111 474 469 375 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 /* * Compatibility functions which bloat the callers too much to make inline. * All of the callers of these functions should be converted to use folios * eventually. */ #include <linux/migrate.h> #include <linux/pagemap.h> #include <linux/rmap.h> #include <linux/swap.h> #include "internal.h" void unlock_page(struct page *page) { return folio_unlock(page_folio(page)); } EXPORT_SYMBOL(unlock_page); void end_page_writeback(struct page *page) { return folio_end_writeback(page_folio(page)); } EXPORT_SYMBOL(end_page_writeback); void wait_on_page_writeback(struct page *page) { return folio_wait_writeback(page_folio(page)); } EXPORT_SYMBOL_GPL(wait_on_page_writeback); void wait_for_stable_page(struct page *page) { return folio_wait_stable(page_folio(page)); } EXPORT_SYMBOL_GPL(wait_for_stable_page); void mark_page_accessed(struct page *page) { folio_mark_accessed(page_folio(page)); } EXPORT_SYMBOL(mark_page_accessed); void set_page_writeback(struct page *page) { folio_start_writeback(page_folio(page)); } EXPORT_SYMBOL(set_page_writeback); bool set_page_dirty(struct page *page) { return folio_mark_dirty(page_folio(page)); } EXPORT_SYMBOL(set_page_dirty); int set_page_dirty_lock(struct page *page) { return folio_mark_dirty_lock(page_folio(page)); } EXPORT_SYMBOL(set_page_dirty_lock); bool clear_page_dirty_for_io(struct page *page) { return folio_clear_dirty_for_io(page_folio(page)); } EXPORT_SYMBOL(clear_page_dirty_for_io); bool redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) { return folio_redirty_for_writepage(wbc, page_folio(page)); } EXPORT_SYMBOL(redirty_page_for_writepage); int add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp) { return filemap_add_folio(mapping, page_folio(page), index, gfp); } EXPORT_SYMBOL(add_to_page_cache_lru); noinline struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, fgf_t fgp_flags, gfp_t gfp) { struct folio *folio; folio = __filemap_get_folio(mapping, index, fgp_flags, gfp); if (IS_ERR(folio)) return NULL; return folio_file_page(folio, index); } EXPORT_SYMBOL(pagecache_get_page); struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index) { return pagecache_get_page(mapping, index, FGP_WRITEBEGIN, mapping_gfp_mask(mapping)); } EXPORT_SYMBOL(grab_cache_page_write_begin);
6 1 5 1 4 4 3 2 2 1 1 1 1 6 3 1 1 2 2 2 2 4 4 1 1 1 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 // SPDX-License-Identifier: GPL-2.0-only /* * Vxlan vni filter for collect metadata mode * * Authors: Roopa Prabhu <roopa@nvidia.com> * */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/etherdevice.h> #include <linux/rhashtable.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/vxlan.h> #include "vxlan_private.h" static inline int vxlan_vni_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { const struct vxlan_vni_node *vnode = ptr; __be32 vni = *(__be32 *)arg->key; return vnode->vni != vni; } const struct rhashtable_params vxlan_vni_rht_params = { .head_offset = offsetof(struct vxlan_vni_node, vnode), .key_offset = offsetof(struct vxlan_vni_node, vni), .key_len = sizeof(__be32), .nelem_hint = 3, .max_size = VXLAN_N_VID, .obj_cmpfn = vxlan_vni_cmp, .automatic_shrinking = true, }; static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan, struct vxlan_vni_node *v, bool del) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_dev_node *node; struct vxlan_sock *vs; spin_lock(&vn->sock_lock); if (del) { if (!hlist_unhashed(&v->hlist4.hlist)) hlist_del_init_rcu(&v->hlist4.hlist); #if IS_ENABLED(CONFIG_IPV6) if (!hlist_unhashed(&v->hlist6.hlist)) hlist_del_init_rcu(&v->hlist6.hlist); #endif goto out; } #if IS_ENABLED(CONFIG_IPV6) vs = rtnl_dereference(vxlan->vn6_sock); if (vs && v) { node = &v->hlist6; hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); } #endif vs = rtnl_dereference(vxlan->vn4_sock); if (vs && v) { node = &v->hlist4; hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); } out: spin_unlock(&vn->sock_lock); } void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan, struct vxlan_sock *vs, bool ipv6) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); struct vxlan_vni_node *v, *tmp; struct vxlan_dev_node *node; if (!vg) return; spin_lock(&vn->sock_lock); list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { #if IS_ENABLED(CONFIG_IPV6) if (ipv6) node = &v->hlist6; else #endif node = &v->hlist4; node->vxlan = vxlan; hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); } spin_unlock(&vn->sock_lock); } void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan) { struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_vni_node *v, *tmp; if (!vg) return; spin_lock(&vn->sock_lock); list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { hlist_del_init_rcu(&v->hlist4.hlist); #if IS_ENABLED(CONFIG_IPV6) hlist_del_init_rcu(&v->hlist6.hlist); #endif } spin_unlock(&vn->sock_lock); } static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode, struct vxlan_vni_stats *dest) { int i; memset(dest, 0, sizeof(*dest)); for_each_possible_cpu(i) { struct vxlan_vni_stats_pcpu *pstats; struct vxlan_vni_stats temp; unsigned int start; pstats = per_cpu_ptr(vninode->stats, i); do { start = u64_stats_fetch_begin(&pstats->syncp); memcpy(&temp, &pstats->stats, sizeof(temp)); } while (u64_stats_fetch_retry(&pstats->syncp, start)); dest->rx_packets += temp.rx_packets; dest->rx_bytes += temp.rx_bytes; dest->rx_drops += temp.rx_drops; dest->rx_errors += temp.rx_errors; dest->tx_packets += temp.tx_packets; dest->tx_bytes += temp.tx_bytes; dest->tx_drops += temp.tx_drops; dest->tx_errors += temp.tx_errors; } } static void vxlan_vnifilter_stats_add(struct vxlan_vni_node *vninode, int type, unsigned int len) { struct vxlan_vni_stats_pcpu *pstats = this_cpu_ptr(vninode->stats); u64_stats_update_begin(&pstats->syncp); switch (type) { case VXLAN_VNI_STATS_RX: pstats->stats.rx_bytes += len; pstats->stats.rx_packets++; break; case VXLAN_VNI_STATS_RX_DROPS: pstats->stats.rx_drops++; break; case VXLAN_VNI_STATS_RX_ERRORS: pstats->stats.rx_errors++; break; case VXLAN_VNI_STATS_TX: pstats->stats.tx_bytes += len; pstats->stats.tx_packets++; break; case VXLAN_VNI_STATS_TX_DROPS: pstats->stats.tx_drops++; break; case VXLAN_VNI_STATS_TX_ERRORS: pstats->stats.tx_errors++; break; } u64_stats_update_end(&pstats->syncp); } void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni, struct vxlan_vni_node *vninode, int type, unsigned int len) { struct vxlan_vni_node *vnode; if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) return; if (vninode) { vnode = vninode; } else { vnode = vxlan_vnifilter_lookup(vxlan, vni); if (!vnode) return; } vxlan_vnifilter_stats_add(vnode, type, len); } static u32 vnirange(struct vxlan_vni_node *vbegin, struct vxlan_vni_node *vend) { return (be32_to_cpu(vend->vni) - be32_to_cpu(vbegin->vni)); } static size_t vxlan_vnifilter_entry_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct tunnel_msg)) + nla_total_size(0) /* VXLAN_VNIFILTER_ENTRY */ + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_START */ + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_END */ + nla_total_size(sizeof(struct in6_addr));/* VXLAN_VNIFILTER_ENTRY_GROUP{6} */ } static int __vnifilter_entry_fill_stats(struct sk_buff *skb, const struct vxlan_vni_node *vbegin) { struct vxlan_vni_stats vstats; struct nlattr *vstats_attr; vstats_attr = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY_STATS); if (!vstats_attr) goto out_stats_err; vxlan_vnifilter_stats_get(vbegin, &vstats); if (nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_BYTES, vstats.rx_bytes, VNIFILTER_ENTRY_STATS_PAD) || nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_PKTS, vstats.rx_packets, VNIFILTER_ENTRY_STATS_PAD) || nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_DROPS, vstats.rx_drops, VNIFILTER_ENTRY_STATS_PAD) || nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_ERRORS, vstats.rx_errors, VNIFILTER_ENTRY_STATS_PAD) || nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_BYTES, vstats.tx_bytes, VNIFILTER_ENTRY_STATS_PAD) || nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_PKTS, vstats.tx_packets, VNIFILTER_ENTRY_STATS_PAD) || nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_DROPS, vstats.tx_drops, VNIFILTER_ENTRY_STATS_PAD) || nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_ERRORS, vstats.tx_errors, VNIFILTER_ENTRY_STATS_PAD)) goto out_stats_err; nla_nest_end(skb, vstats_attr); return 0; out_stats_err: nla_nest_cancel(skb, vstats_attr); return -EMSGSIZE; } static bool vxlan_fill_vni_filter_entry(struct sk_buff *skb, struct vxlan_vni_node *vbegin, struct vxlan_vni_node *vend, bool fill_stats) { struct nlattr *ventry; u32 vs = be32_to_cpu(vbegin->vni); u32 ve = 0; if (vbegin != vend) ve = be32_to_cpu(vend->vni); ventry = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY); if (!ventry) return false; if (nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_START, vs)) goto out_err; if (ve && nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_END, ve)) goto out_err; if (!vxlan_addr_any(&vbegin->remote_ip)) { if (vbegin->remote_ip.sa.sa_family == AF_INET) { if (nla_put_in_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP, vbegin->remote_ip.sin.sin_addr.s_addr)) goto out_err; #if IS_ENABLED(CONFIG_IPV6) } else { if (nla_put_in6_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP6, &vbegin->remote_ip.sin6.sin6_addr)) goto out_err; #endif } } if (fill_stats && __vnifilter_entry_fill_stats(skb, vbegin)) goto out_err; nla_nest_end(skb, ventry); return true; out_err: nla_nest_cancel(skb, ventry); return false; } static void vxlan_vnifilter_notify(const struct vxlan_dev *vxlan, struct vxlan_vni_node *vninode, int cmd) { struct tunnel_msg *tmsg; struct sk_buff *skb; struct nlmsghdr *nlh; struct net *net = dev_net(vxlan->dev); int err = -ENOBUFS; skb = nlmsg_new(vxlan_vnifilter_entry_nlmsg_size(), GFP_KERNEL); if (!skb) goto out_err; err = -EMSGSIZE; nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*tmsg), 0); if (!nlh) goto out_err; tmsg = nlmsg_data(nlh); memset(tmsg, 0, sizeof(*tmsg)); tmsg->family = AF_BRIDGE; tmsg->ifindex = vxlan->dev->ifindex; if (!vxlan_fill_vni_filter_entry(skb, vninode, vninode, false)) goto out_err; nlmsg_end(skb, nlh); rtnl_notify(skb, net, 0, RTNLGRP_TUNNEL, NULL, GFP_KERNEL); return; out_err: rtnl_set_sk_err(net, RTNLGRP_TUNNEL, err); kfree_skb(skb); } static int vxlan_vnifilter_dump_dev(const struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb) { struct vxlan_vni_node *tmp, *v, *vbegin = NULL, *vend = NULL; struct vxlan_dev *vxlan = netdev_priv(dev); struct tunnel_msg *new_tmsg, *tmsg; int idx = 0, s_idx = cb->args[1]; struct vxlan_vni_group *vg; struct nlmsghdr *nlh; bool dump_stats; int err = 0; if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) return -EINVAL; /* RCU needed because of the vni locking rules (rcu || rtnl) */ vg = rcu_dereference(vxlan->vnigrp); if (!vg || !vg->num_vnis) return 0; tmsg = nlmsg_data(cb->nlh); dump_stats = !!(tmsg->flags & TUNNEL_MSG_FLAG_STATS); nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWTUNNEL, sizeof(*new_tmsg), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; new_tmsg = nlmsg_data(nlh); memset(new_tmsg, 0, sizeof(*new_tmsg)); new_tmsg->family = PF_BRIDGE; new_tmsg->ifindex = dev->ifindex; list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { if (idx < s_idx) { idx++; continue; } if (!vbegin) { vbegin = v; vend = v; continue; } if (!dump_stats && vnirange(vend, v) == 1 && vxlan_addr_equal(&v->remote_ip, &vend->remote_ip)) { goto update_end; } else { if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, dump_stats)) { err = -EMSGSIZE; break; } idx += vnirange(vbegin, vend) + 1; vbegin = v; } update_end: vend = v; } if (!err && vbegin) { if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, dump_stats)) err = -EMSGSIZE; } cb->args[1] = err ? idx : 0; nlmsg_end(skb, nlh); return err; } static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb) { int idx = 0, err = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); struct tunnel_msg *tmsg; struct net_device *dev; if (cb->nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct tunnel_msg))) { NL_SET_ERR_MSG(cb->extack, "Invalid msg length"); return -EINVAL; } tmsg = nlmsg_data(cb->nlh); if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) { NL_SET_ERR_MSG(cb->extack, "Invalid tunnelmsg flags in ancillary header"); return -EINVAL; } rcu_read_lock(); if (tmsg->ifindex) { dev = dev_get_by_index_rcu(net, tmsg->ifindex); if (!dev) { err = -ENODEV; goto out_err; } if (!netif_is_vxlan(dev)) { NL_SET_ERR_MSG(cb->extack, "The device is not a vxlan device"); err = -EINVAL; goto out_err; } err = vxlan_vnifilter_dump_dev(dev, skb, cb); /* if the dump completed without an error we return 0 here */ if (err != -EMSGSIZE) goto out_err; } else { for_each_netdev_rcu(net, dev) { if (!netif_is_vxlan(dev)) continue; if (idx < s_idx) goto skip; err = vxlan_vnifilter_dump_dev(dev, skb, cb); if (err == -EMSGSIZE) break; skip: idx++; } } cb->args[0] = idx; rcu_read_unlock(); return skb->len; out_err: rcu_read_unlock(); return err; } static const struct nla_policy vni_filter_entry_policy[VXLAN_VNIFILTER_ENTRY_MAX + 1] = { [VXLAN_VNIFILTER_ENTRY_START] = { .type = NLA_U32 }, [VXLAN_VNIFILTER_ENTRY_END] = { .type = NLA_U32 }, [VXLAN_VNIFILTER_ENTRY_GROUP] = { .type = NLA_BINARY, .len = sizeof_field(struct iphdr, daddr) }, [VXLAN_VNIFILTER_ENTRY_GROUP6] = { .type = NLA_BINARY, .len = sizeof(struct in6_addr) }, }; static const struct nla_policy vni_filter_policy[VXLAN_VNIFILTER_MAX + 1] = { [VXLAN_VNIFILTER_ENTRY] = { .type = NLA_NESTED }, }; static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni, union vxlan_addr *old_remote_ip, union vxlan_addr *remote_ip, struct netlink_ext_ack *extack) { struct vxlan_rdst *dst = &vxlan->default_dst; u32 hash_index; int err = 0; hash_index = fdb_head_index(vxlan, all_zeros_mac, vni); spin_lock_bh(&vxlan->hash_lock[hash_index]); if (remote_ip && !vxlan_addr_any(remote_ip)) { err = vxlan_fdb_update(vxlan, all_zeros_mac, remote_ip, NUD_REACHABLE | NUD_PERMANENT, NLM_F_APPEND | NLM_F_CREATE, vxlan->cfg.dst_port, vni, vni, dst->remote_ifindex, NTF_SELF, 0, true, extack); if (err) { spin_unlock_bh(&vxlan->hash_lock[hash_index]); return err; } } if (old_remote_ip && !vxlan_addr_any(old_remote_ip)) { __vxlan_fdb_delete(vxlan, all_zeros_mac, *old_remote_ip, vxlan->cfg.dst_port, vni, vni, dst->remote_ifindex, true); } spin_unlock_bh(&vxlan->hash_lock[hash_index]); return err; } static int vxlan_vni_update_group(struct vxlan_dev *vxlan, struct vxlan_vni_node *vninode, union vxlan_addr *group, bool create, bool *changed, struct netlink_ext_ack *extack) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_rdst *dst = &vxlan->default_dst; union vxlan_addr *newrip = NULL, *oldrip = NULL; union vxlan_addr old_remote_ip; int ret = 0; memcpy(&old_remote_ip, &vninode->remote_ip, sizeof(old_remote_ip)); /* if per vni remote ip is not present use vxlan dev * default dst remote ip for fdb entry */ if (group && !vxlan_addr_any(group)) { newrip = group; } else { if (!vxlan_addr_any(&dst->remote_ip)) newrip = &dst->remote_ip; } /* if old rip exists, and no newrip, * explicitly delete old rip */ if (!newrip && !vxlan_addr_any(&old_remote_ip)) oldrip = &old_remote_ip; if (!newrip && !oldrip) return 0; if (!create && oldrip && newrip && vxlan_addr_equal(oldrip, newrip)) return 0; ret = vxlan_update_default_fdb_entry(vxlan, vninode->vni, oldrip, newrip, extack); if (ret) goto out; if (group) memcpy(&vninode->remote_ip, group, sizeof(vninode->remote_ip)); if (vxlan->dev->flags & IFF_UP) { if (vxlan_addr_multicast(&old_remote_ip) && !vxlan_group_used(vn, vxlan, vninode->vni, &old_remote_ip, vxlan->default_dst.remote_ifindex)) { ret = vxlan_igmp_leave(vxlan, &old_remote_ip, 0); if (ret) goto out; } if (vxlan_addr_multicast(&vninode->remote_ip)) { ret = vxlan_igmp_join(vxlan, &vninode->remote_ip, 0); if (ret == -EADDRINUSE) ret = 0; if (ret) goto out; } } *changed = true; return 0; out: return ret; } int vxlan_vnilist_update_group(struct vxlan_dev *vxlan, union vxlan_addr *old_remote_ip, union vxlan_addr *new_remote_ip, struct netlink_ext_ack *extack) { struct list_head *headp, *hpos; struct vxlan_vni_group *vg; struct vxlan_vni_node *vent; int ret; vg = rtnl_dereference(vxlan->vnigrp); headp = &vg->vni_list; list_for_each_prev(hpos, headp) { vent = list_entry(hpos, struct vxlan_vni_node, vlist); if (vxlan_addr_any(&vent->remote_ip)) { ret = vxlan_update_default_fdb_entry(vxlan, vent->vni, old_remote_ip, new_remote_ip, extack); if (ret) return ret; } } return 0; } static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, struct vxlan_vni_node *vninode) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_rdst *dst = &vxlan->default_dst; /* if per vni remote_ip not present, delete the * default dst remote_ip previously added for this vni */ if (!vxlan_addr_any(&vninode->remote_ip) || !vxlan_addr_any(&dst->remote_ip)) __vxlan_fdb_delete(vxlan, all_zeros_mac, (vxlan_addr_any(&vninode->remote_ip) ? dst->remote_ip : vninode->remote_ip), vxlan->cfg.dst_port, vninode->vni, vninode->vni, dst->remote_ifindex, true); if (vxlan->dev->flags & IFF_UP) { if (vxlan_addr_multicast(&vninode->remote_ip) && !vxlan_group_used(vn, vxlan, vninode->vni, &vninode->remote_ip, dst->remote_ifindex)) { vxlan_igmp_leave(vxlan, &vninode->remote_ip, 0); } } } static int vxlan_vni_update(struct vxlan_dev *vxlan, struct vxlan_vni_group *vg, __be32 vni, union vxlan_addr *group, bool *changed, struct netlink_ext_ack *extack) { struct vxlan_vni_node *vninode; int ret; vninode = rhashtable_lookup_fast(&vg->vni_hash, &vni, vxlan_vni_rht_params); if (!vninode) return 0; ret = vxlan_vni_update_group(vxlan, vninode, group, false, changed, extack); if (ret) return ret; if (changed) vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); return 0; } static void __vxlan_vni_add_list(struct vxlan_vni_group *vg, struct vxlan_vni_node *v) { struct list_head *headp, *hpos; struct vxlan_vni_node *vent; headp = &vg->vni_list; list_for_each_prev(hpos, headp) { vent = list_entry(hpos, struct vxlan_vni_node, vlist); if (be32_to_cpu(v->vni) < be32_to_cpu(vent->vni)) continue; else break; } list_add_rcu(&v->vlist, hpos); vg->num_vnis++; } static void __vxlan_vni_del_list(struct vxlan_vni_group *vg, struct vxlan_vni_node *v) { list_del_rcu(&v->vlist); vg->num_vnis--; } static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan, __be32 vni) { struct vxlan_vni_node *vninode; vninode = kzalloc(sizeof(*vninode), GFP_KERNEL); if (!vninode) return NULL; vninode->stats = netdev_alloc_pcpu_stats(struct vxlan_vni_stats_pcpu); if (!vninode->stats) { kfree(vninode); return NULL; } vninode->vni = vni; vninode->hlist4.vxlan = vxlan; #if IS_ENABLED(CONFIG_IPV6) vninode->hlist6.vxlan = vxlan; #endif return vninode; } static void vxlan_vni_free(struct vxlan_vni_node *vninode) { free_percpu(vninode->stats); kfree(vninode); } static int vxlan_vni_add(struct vxlan_dev *vxlan, struct vxlan_vni_group *vg, u32 vni, union vxlan_addr *group, struct netlink_ext_ack *extack) { struct vxlan_vni_node *vninode; __be32 v = cpu_to_be32(vni); bool changed = false; int err = 0; if (vxlan_vnifilter_lookup(vxlan, v)) return vxlan_vni_update(vxlan, vg, v, group, &changed, extack); err = vxlan_vni_in_use(vxlan->net, vxlan, &vxlan->cfg, v); if (err) { NL_SET_ERR_MSG(extack, "VNI in use"); return err; } vninode = vxlan_vni_alloc(vxlan, v); if (!vninode) return -ENOMEM; err = rhashtable_lookup_insert_fast(&vg->vni_hash, &vninode->vnode, vxlan_vni_rht_params); if (err) { vxlan_vni_free(vninode); return err; } __vxlan_vni_add_list(vg, vninode); if (vxlan->dev->flags & IFF_UP) vxlan_vs_add_del_vninode(vxlan, vninode, false); err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed, extack); if (changed) vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); return err; } static void vxlan_vni_node_rcu_free(struct rcu_head *rcu) { struct vxlan_vni_node *v; v = container_of(rcu, struct vxlan_vni_node, rcu); vxlan_vni_free(v); } static int vxlan_vni_del(struct vxlan_dev *vxlan, struct vxlan_vni_group *vg, u32 vni, struct netlink_ext_ack *extack) { struct vxlan_vni_node *vninode; __be32 v = cpu_to_be32(vni); int err = 0; vg = rtnl_dereference(vxlan->vnigrp); vninode = rhashtable_lookup_fast(&vg->vni_hash, &v, vxlan_vni_rht_params); if (!vninode) { err = -ENOENT; goto out; } vxlan_vni_delete_group(vxlan, vninode); err = rhashtable_remove_fast(&vg->vni_hash, &vninode->vnode, vxlan_vni_rht_params); if (err) goto out; __vxlan_vni_del_list(vg, vninode); vxlan_vnifilter_notify(vxlan, vninode, RTM_DELTUNNEL); if (vxlan->dev->flags & IFF_UP) vxlan_vs_add_del_vninode(vxlan, vninode, true); call_rcu(&vninode->rcu, vxlan_vni_node_rcu_free); return 0; out: return err; } static int vxlan_vni_add_del(struct vxlan_dev *vxlan, __u32 start_vni, __u32 end_vni, union vxlan_addr *group, int cmd, struct netlink_ext_ack *extack) { struct vxlan_vni_group *vg; int v, err = 0; vg = rtnl_dereference(vxlan->vnigrp); for (v = start_vni; v <= end_vni; v++) { switch (cmd) { case RTM_NEWTUNNEL: err = vxlan_vni_add(vxlan, vg, v, group, extack); break; case RTM_DELTUNNEL: err = vxlan_vni_del(vxlan, vg, v, extack); break; default: err = -EOPNOTSUPP; break; } if (err) goto out; } return 0; out: return err; } static int vxlan_process_vni_filter(struct vxlan_dev *vxlan, struct nlattr *nlvnifilter, int cmd, struct netlink_ext_ack *extack) { struct nlattr *vattrs[VXLAN_VNIFILTER_ENTRY_MAX + 1]; u32 vni_start = 0, vni_end = 0; union vxlan_addr group; int err; err = nla_parse_nested(vattrs, VXLAN_VNIFILTER_ENTRY_MAX, nlvnifilter, vni_filter_entry_policy, extack); if (err) return err; if (vattrs[VXLAN_VNIFILTER_ENTRY_START]) { vni_start = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_START]); vni_end = vni_start; } if (vattrs[VXLAN_VNIFILTER_ENTRY_END]) vni_end = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_END]); if (!vni_start && !vni_end) { NL_SET_ERR_MSG_ATTR(extack, nlvnifilter, "vni start nor end found in vni entry"); return -EINVAL; } if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]) { group.sin.sin_addr.s_addr = nla_get_in_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]); group.sa.sa_family = AF_INET; } else if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]) { group.sin6.sin6_addr = nla_get_in6_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]); group.sa.sa_family = AF_INET6; } else { memset(&group, 0, sizeof(group)); } if (vxlan_addr_multicast(&group) && !vxlan->default_dst.remote_ifindex) { NL_SET_ERR_MSG(extack, "Local interface required for multicast remote group"); return -EINVAL; } err = vxlan_vni_add_del(vxlan, vni_start, vni_end, &group, cmd, extack); if (err) return err; return 0; } void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan) { struct vxlan_vni_node *v, *tmp; struct vxlan_vni_group *vg; vg = rtnl_dereference(vxlan->vnigrp); list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { rhashtable_remove_fast(&vg->vni_hash, &v->vnode, vxlan_vni_rht_params); hlist_del_init_rcu(&v->hlist4.hlist); #if IS_ENABLED(CONFIG_IPV6) hlist_del_init_rcu(&v->hlist6.hlist); #endif __vxlan_vni_del_list(vg, v); vxlan_vnifilter_notify(vxlan, v, RTM_DELTUNNEL); call_rcu(&v->rcu, vxlan_vni_node_rcu_free); } rhashtable_destroy(&vg->vni_hash); kfree(vg); } int vxlan_vnigroup_init(struct vxlan_dev *vxlan) { struct vxlan_vni_group *vg; int ret; vg = kzalloc(sizeof(*vg), GFP_KERNEL); if (!vg) return -ENOMEM; ret = rhashtable_init(&vg->vni_hash, &vxlan_vni_rht_params); if (ret) { kfree(vg); return ret; } INIT_LIST_HEAD(&vg->vni_list); rcu_assign_pointer(vxlan->vnigrp, vg); return 0; } static int vxlan_vnifilter_process(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct tunnel_msg *tmsg; struct vxlan_dev *vxlan; struct net_device *dev; struct nlattr *attr; int err, vnis = 0; int rem; /* this should validate the header and check for remaining bytes */ err = nlmsg_parse(nlh, sizeof(*tmsg), NULL, VXLAN_VNIFILTER_MAX, vni_filter_policy, extack); if (err < 0) return err; tmsg = nlmsg_data(nlh); dev = __dev_get_by_index(net, tmsg->ifindex); if (!dev) return -ENODEV; if (!netif_is_vxlan(dev)) { NL_SET_ERR_MSG_MOD(extack, "The device is not a vxlan device"); return -EINVAL; } vxlan = netdev_priv(dev); if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) return -EOPNOTSUPP; nlmsg_for_each_attr(attr, nlh, sizeof(*tmsg), rem) { switch (nla_type(attr)) { case VXLAN_VNIFILTER_ENTRY: err = vxlan_process_vni_filter(vxlan, attr, nlh->nlmsg_type, extack); break; default: continue; } vnis++; if (err) break; } if (!vnis) { NL_SET_ERR_MSG_MOD(extack, "No vnis found to process"); err = -EINVAL; } return err; } static const struct rtnl_msg_handler vxlan_vnifilter_rtnl_msg_handlers[] = { {THIS_MODULE, PF_BRIDGE, RTM_GETTUNNEL, NULL, vxlan_vnifilter_dump, 0}, {THIS_MODULE, PF_BRIDGE, RTM_NEWTUNNEL, vxlan_vnifilter_process, NULL, 0}, {THIS_MODULE, PF_BRIDGE, RTM_DELTUNNEL, vxlan_vnifilter_process, NULL, 0}, }; int vxlan_vnifilter_init(void) { return rtnl_register_many(vxlan_vnifilter_rtnl_msg_handlers); } void vxlan_vnifilter_uninit(void) { rtnl_unregister_many(vxlan_vnifilter_rtnl_msg_handlers); }
20 20 20 20 20 20 20 20 20 20 20 20 8 1 1 7 7 7 7 7 7 7 3 7 62 62 62 21 21 7 1 1 7 7 7 7 7 45 45 44 46 45 45 46 46 46 25 25 24 25 13 25 25 25 25 8 36 32 36 38 38 16 16 38 11 11 11 10 10 10 196 195 196 196 38 38 38 38 3 9 9 9 9 9 9 40 40 40 39 38 1 1 38 38 38 38 38 38 40 25 25 25 25 25 25 28 28 28 9 28 28 28 28 28 3 6 6 6 7 7 5 4 4 3 70 70 238 238 237 52 183 10 183 1 1 1 1 1 1 1 1 1 1 1 1 5 5 4 4 1 1 3 3 2 5 5 4 23 23 11 22 1 1 1 1 1 1 1 9 9 9 9 1 1 1 8 11 11 9 12 11 12 11 11 11 9 18 18 17 17 17 18 19 1 18 18 18 18 17 12 1 12 12 11 1 11 1 10 7 5 13 13 14 1 13 13 13 13 10 2 2 1 1 1 4 4 3 3 2 1 1 1 1 1 1 1 1 9 9 9 8 8 1 7 7 5 4 3 3 11 2 11 27 28 27 26 4 23 6 23 14 2 12 23 21 21 21 21 21 21 9 3 6 15 15 7 15 13 15 2 15 8 13 13 28 27 13 2 28 1 1 4 4 1 3 1 3 1 2 2 2 2 7 7 7 7 6 5 4 3 3 3 2 144 144 3 1 5 12 19 14 2 4 25 5 1 4 2 2 3 3 2 5 7 144 1 45 34 45 44 1 7 7 7 6 6 6 4 3 3 3 7 3 2 4 4 1 3 4 5 12 12 7 7 2 119 3 19 18 119 163 163 163 162 162 2 162 161 178 178 19 178 177 180 181 181 181 181 1 17 2 2 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 // SPDX-License-Identifier: GPL-2.0-or-later /* * Routines for driver control interface * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <linux/threads.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/math64.h> #include <linux/sched/signal.h> #include <sound/core.h> #include <sound/minors.h> #include <sound/info.h> #include <sound/control.h> // Max allocation size for user controls. static int max_user_ctl_alloc_size = 8 * 1024 * 1024; module_param_named(max_user_ctl_alloc_size, max_user_ctl_alloc_size, int, 0444); MODULE_PARM_DESC(max_user_ctl_alloc_size, "Max allocation size for user controls"); #define MAX_CONTROL_COUNT 1028 struct snd_kctl_ioctl { struct list_head list; /* list of all ioctls */ snd_kctl_ioctl_func_t fioctl; }; static DECLARE_RWSEM(snd_ioctl_rwsem); static DECLARE_RWSEM(snd_ctl_layer_rwsem); static LIST_HEAD(snd_control_ioctls); #ifdef CONFIG_COMPAT static LIST_HEAD(snd_control_compat_ioctls); #endif static struct snd_ctl_layer_ops *snd_ctl_layer; static int snd_ctl_remove_locked(struct snd_card *card, struct snd_kcontrol *kcontrol); static int snd_ctl_open(struct inode *inode, struct file *file) { struct snd_card *card; struct snd_ctl_file *ctl; int i, err; err = stream_open(inode, file); if (err < 0) return err; card = snd_lookup_minor_data(iminor(inode), SNDRV_DEVICE_TYPE_CONTROL); if (!card) { err = -ENODEV; goto __error1; } err = snd_card_file_add(card, file); if (err < 0) { err = -ENODEV; goto __error1; } if (!try_module_get(card->module)) { err = -EFAULT; goto __error2; } ctl = kzalloc(sizeof(*ctl), GFP_KERNEL); if (ctl == NULL) { err = -ENOMEM; goto __error; } INIT_LIST_HEAD(&ctl->events); init_waitqueue_head(&ctl->change_sleep); spin_lock_init(&ctl->read_lock); ctl->card = card; for (i = 0; i < SND_CTL_SUBDEV_ITEMS; i++) ctl->preferred_subdevice[i] = -1; ctl->pid = get_pid(task_pid(current)); file->private_data = ctl; scoped_guard(write_lock_irqsave, &card->controls_rwlock) list_add_tail(&ctl->list, &card->ctl_files); snd_card_unref(card); return 0; __error: module_put(card->module); __error2: snd_card_file_remove(card, file); __error1: if (card) snd_card_unref(card); return err; } static void snd_ctl_empty_read_queue(struct snd_ctl_file * ctl) { struct snd_kctl_event *cread; guard(spinlock_irqsave)(&ctl->read_lock); while (!list_empty(&ctl->events)) { cread = snd_kctl_event(ctl->events.next); list_del(&cread->list); kfree(cread); } } static int snd_ctl_release(struct inode *inode, struct file *file) { struct snd_card *card; struct snd_ctl_file *ctl; struct snd_kcontrol *control; unsigned int idx; ctl = file->private_data; file->private_data = NULL; card = ctl->card; scoped_guard(write_lock_irqsave, &card->controls_rwlock) list_del(&ctl->list); scoped_guard(rwsem_write, &card->controls_rwsem) { list_for_each_entry(control, &card->controls, list) for (idx = 0; idx < control->count; idx++) if (control->vd[idx].owner == ctl) control->vd[idx].owner = NULL; } snd_fasync_free(ctl->fasync); snd_ctl_empty_read_queue(ctl); put_pid(ctl->pid); kfree(ctl); module_put(card->module); snd_card_file_remove(card, file); return 0; } /** * snd_ctl_notify - Send notification to user-space for a control change * @card: the card to send notification * @mask: the event mask, SNDRV_CTL_EVENT_* * @id: the ctl element id to send notification * * This function adds an event record with the given id and mask, appends * to the list and wakes up the user-space for notification. This can be * called in the atomic context. */ void snd_ctl_notify(struct snd_card *card, unsigned int mask, struct snd_ctl_elem_id *id) { struct snd_ctl_file *ctl; struct snd_kctl_event *ev; if (snd_BUG_ON(!card || !id)) return; if (card->shutdown) return; guard(read_lock_irqsave)(&card->controls_rwlock); #if IS_ENABLED(CONFIG_SND_MIXER_OSS) card->mixer_oss_change_count++; #endif list_for_each_entry(ctl, &card->ctl_files, list) { if (!ctl->subscribed) continue; scoped_guard(spinlock, &ctl->read_lock) { list_for_each_entry(ev, &ctl->events, list) { if (ev->id.numid == id->numid) { ev->mask |= mask; goto _found; } } ev = kzalloc(sizeof(*ev), GFP_ATOMIC); if (ev) { ev->id = *id; ev->mask = mask; list_add_tail(&ev->list, &ctl->events); } else { dev_err(card->dev, "No memory available to allocate event\n"); } _found: wake_up(&ctl->change_sleep); } snd_kill_fasync(ctl->fasync, SIGIO, POLL_IN); } } EXPORT_SYMBOL(snd_ctl_notify); /** * snd_ctl_notify_one - Send notification to user-space for a control change * @card: the card to send notification * @mask: the event mask, SNDRV_CTL_EVENT_* * @kctl: the pointer with the control instance * @ioff: the additional offset to the control index * * This function calls snd_ctl_notify() and does additional jobs * like LED state changes. */ void snd_ctl_notify_one(struct snd_card *card, unsigned int mask, struct snd_kcontrol *kctl, unsigned int ioff) { struct snd_ctl_elem_id id = kctl->id; struct snd_ctl_layer_ops *lops; id.index += ioff; id.numid += ioff; snd_ctl_notify(card, mask, &id); guard(rwsem_read)(&snd_ctl_layer_rwsem); for (lops = snd_ctl_layer; lops; lops = lops->next) lops->lnotify(card, mask, kctl, ioff); } EXPORT_SYMBOL(snd_ctl_notify_one); /** * snd_ctl_new - create a new control instance with some elements * @kctl: the pointer to store new control instance * @count: the number of elements in this control * @access: the default access flags for elements in this control * @file: given when locking these elements * * Allocates a memory object for a new control instance. The instance has * elements as many as the given number (@count). Each element has given * access permissions (@access). Each element is locked when @file is given. * * Return: 0 on success, error code on failure */ static int snd_ctl_new(struct snd_kcontrol **kctl, unsigned int count, unsigned int access, struct snd_ctl_file *file) { unsigned int idx; if (count == 0 || count > MAX_CONTROL_COUNT) return -EINVAL; *kctl = kzalloc(struct_size(*kctl, vd, count), GFP_KERNEL); if (!*kctl) return -ENOMEM; (*kctl)->count = count; for (idx = 0; idx < count; idx++) { (*kctl)->vd[idx].access = access; (*kctl)->vd[idx].owner = file; } return 0; } /** * snd_ctl_new1 - create a control instance from the template * @ncontrol: the initialization record * @private_data: the private data to set * * Allocates a new struct snd_kcontrol instance and initialize from the given * template. When the access field of ncontrol is 0, it's assumed as * READWRITE access. When the count field is 0, it's assumes as one. * * Return: The pointer of the newly generated instance, or %NULL on failure. */ struct snd_kcontrol *snd_ctl_new1(const struct snd_kcontrol_new *ncontrol, void *private_data) { struct snd_kcontrol *kctl; unsigned int count; unsigned int access; int err; if (snd_BUG_ON(!ncontrol || !ncontrol->info)) return NULL; count = ncontrol->count; if (count == 0) count = 1; access = ncontrol->access; if (access == 0) access = SNDRV_CTL_ELEM_ACCESS_READWRITE; access &= (SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_VOLATILE | SNDRV_CTL_ELEM_ACCESS_INACTIVE | SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND | SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK | SNDRV_CTL_ELEM_ACCESS_LED_MASK | SNDRV_CTL_ELEM_ACCESS_SKIP_CHECK); err = snd_ctl_new(&kctl, count, access, NULL); if (err < 0) return NULL; /* The 'numid' member is decided when calling snd_ctl_add(). */ kctl->id.iface = ncontrol->iface; kctl->id.device = ncontrol->device; kctl->id.subdevice = ncontrol->subdevice; if (ncontrol->name) { strscpy(kctl->id.name, ncontrol->name, sizeof(kctl->id.name)); if (strcmp(ncontrol->name, kctl->id.name) != 0) pr_warn("ALSA: Control name '%s' truncated to '%s'\n", ncontrol->name, kctl->id.name); } kctl->id.index = ncontrol->index; kctl->info = ncontrol->info; kctl->get = ncontrol->get; kctl->put = ncontrol->put; kctl->tlv.p = ncontrol->tlv.p; kctl->private_value = ncontrol->private_value; kctl->private_data = private_data; return kctl; } EXPORT_SYMBOL(snd_ctl_new1); /** * snd_ctl_free_one - release the control instance * @kcontrol: the control instance * * Releases the control instance created via snd_ctl_new() * or snd_ctl_new1(). * Don't call this after the control was added to the card. */ void snd_ctl_free_one(struct snd_kcontrol *kcontrol) { if (kcontrol) { if (kcontrol->private_free) kcontrol->private_free(kcontrol); kfree(kcontrol); } } EXPORT_SYMBOL(snd_ctl_free_one); static bool snd_ctl_remove_numid_conflict(struct snd_card *card, unsigned int count) { struct snd_kcontrol *kctl; /* Make sure that the ids assigned to the control do not wrap around */ if (card->last_numid >= UINT_MAX - count) card->last_numid = 0; list_for_each_entry(kctl, &card->controls, list) { if (kctl->id.numid < card->last_numid + 1 + count && kctl->id.numid + kctl->count > card->last_numid + 1) { card->last_numid = kctl->id.numid + kctl->count - 1; return true; } } return false; } static int snd_ctl_find_hole(struct snd_card *card, unsigned int count) { unsigned int iter = 100000; while (snd_ctl_remove_numid_conflict(card, count)) { if (--iter == 0) { /* this situation is very unlikely */ dev_err(card->dev, "unable to allocate new control numid\n"); return -ENOMEM; } } return 0; } /* check whether the given id is contained in the given kctl */ static bool elem_id_matches(const struct snd_kcontrol *kctl, const struct snd_ctl_elem_id *id) { return kctl->id.iface == id->iface && kctl->id.device == id->device && kctl->id.subdevice == id->subdevice && !strncmp(kctl->id.name, id->name, sizeof(kctl->id.name)) && kctl->id.index <= id->index && kctl->id.index + kctl->count > id->index; } #ifdef CONFIG_SND_CTL_FAST_LOOKUP /* Compute a hash key for the corresponding ctl id * It's for the name lookup, hence the numid is excluded. * The hash key is bound in LONG_MAX to be used for Xarray key. */ #define MULTIPLIER 37 static unsigned long get_ctl_id_hash(const struct snd_ctl_elem_id *id) { int i; unsigned long h; h = id->iface; h = MULTIPLIER * h + id->device; h = MULTIPLIER * h + id->subdevice; for (i = 0; i < SNDRV_CTL_ELEM_ID_NAME_MAXLEN && id->name[i]; i++) h = MULTIPLIER * h + id->name[i]; h = MULTIPLIER * h + id->index; h &= LONG_MAX; return h; } /* add hash entries to numid and ctl xarray tables */ static void add_hash_entries(struct snd_card *card, struct snd_kcontrol *kcontrol) { struct snd_ctl_elem_id id = kcontrol->id; int i; xa_store_range(&card->ctl_numids, kcontrol->id.numid, kcontrol->id.numid + kcontrol->count - 1, kcontrol, GFP_KERNEL); for (i = 0; i < kcontrol->count; i++) { id.index = kcontrol->id.index + i; if (xa_insert(&card->ctl_hash, get_ctl_id_hash(&id), kcontrol, GFP_KERNEL)) { /* skip hash for this entry, noting we had collision */ card->ctl_hash_collision = true; dev_dbg(card->dev, "ctl_hash collision %d:%s:%d\n", id.iface, id.name, id.index); } } } /* remove hash entries that have been added */ static void remove_hash_entries(struct snd_card *card, struct snd_kcontrol *kcontrol) { struct snd_ctl_elem_id id = kcontrol->id; struct snd_kcontrol *matched; unsigned long h; int i; for (i = 0; i < kcontrol->count; i++) { xa_erase(&card->ctl_numids, id.numid); h = get_ctl_id_hash(&id); matched = xa_load(&card->ctl_hash, h); if (matched && (matched == kcontrol || elem_id_matches(matched, &id))) xa_erase(&card->ctl_hash, h); id.index++; id.numid++; } } #else /* CONFIG_SND_CTL_FAST_LOOKUP */ static inline void add_hash_entries(struct snd_card *card, struct snd_kcontrol *kcontrol) { } static inline void remove_hash_entries(struct snd_card *card, struct snd_kcontrol *kcontrol) { } #endif /* CONFIG_SND_CTL_FAST_LOOKUP */ enum snd_ctl_add_mode { CTL_ADD_EXCLUSIVE, CTL_REPLACE, CTL_ADD_ON_REPLACE, }; /* add/replace a new kcontrol object; call with card->controls_rwsem locked */ static int __snd_ctl_add_replace(struct snd_card *card, struct snd_kcontrol *kcontrol, enum snd_ctl_add_mode mode) { struct snd_ctl_elem_id id; unsigned int idx; struct snd_kcontrol *old; int err; lockdep_assert_held_write(&card->controls_rwsem); id = kcontrol->id; if (id.index > UINT_MAX - kcontrol->count) return -EINVAL; old = snd_ctl_find_id(card, &id); if (!old) { if (mode == CTL_REPLACE) return -EINVAL; } else { if (mode == CTL_ADD_EXCLUSIVE) { dev_err(card->dev, "control %i:%i:%i:%s:%i is already present\n", id.iface, id.device, id.subdevice, id.name, id.index); return -EBUSY; } err = snd_ctl_remove_locked(card, old); if (err < 0) return err; } if (snd_ctl_find_hole(card, kcontrol->count) < 0) return -ENOMEM; scoped_guard(write_lock_irq, &card->controls_rwlock) { list_add_tail(&kcontrol->list, &card->controls); card->controls_count += kcontrol->count; kcontrol->id.numid = card->last_numid + 1; card->last_numid += kcontrol->count; } add_hash_entries(card, kcontrol); for (idx = 0; idx < kcontrol->count; idx++) snd_ctl_notify_one(card, SNDRV_CTL_EVENT_MASK_ADD, kcontrol, idx); return 0; } static int snd_ctl_add_replace(struct snd_card *card, struct snd_kcontrol *kcontrol, enum snd_ctl_add_mode mode) { int err = -EINVAL; if (! kcontrol) return err; if (snd_BUG_ON(!card || !kcontrol->info)) goto error; scoped_guard(rwsem_write, &card->controls_rwsem) err = __snd_ctl_add_replace(card, kcontrol, mode); if (err < 0) goto error; return 0; error: snd_ctl_free_one(kcontrol); return err; } /** * snd_ctl_add - add the control instance to the card * @card: the card instance * @kcontrol: the control instance to add * * Adds the control instance created via snd_ctl_new() or * snd_ctl_new1() to the given card. Assigns also an unique * numid used for fast search. * * It frees automatically the control which cannot be added. * * Return: Zero if successful, or a negative error code on failure. * */ int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol) { return snd_ctl_add_replace(card, kcontrol, CTL_ADD_EXCLUSIVE); } EXPORT_SYMBOL(snd_ctl_add); /** * snd_ctl_replace - replace the control instance of the card * @card: the card instance * @kcontrol: the control instance to replace * @add_on_replace: add the control if not already added * * Replaces the given control. If the given control does not exist * and the add_on_replace flag is set, the control is added. If the * control exists, it is destroyed first. * * It frees automatically the control which cannot be added or replaced. * * Return: Zero if successful, or a negative error code on failure. */ int snd_ctl_replace(struct snd_card *card, struct snd_kcontrol *kcontrol, bool add_on_replace) { return snd_ctl_add_replace(card, kcontrol, add_on_replace ? CTL_ADD_ON_REPLACE : CTL_REPLACE); } EXPORT_SYMBOL(snd_ctl_replace); static int __snd_ctl_remove(struct snd_card *card, struct snd_kcontrol *kcontrol, bool remove_hash) { unsigned int idx; lockdep_assert_held_write(&card->controls_rwsem); if (snd_BUG_ON(!card || !kcontrol)) return -EINVAL; if (remove_hash) remove_hash_entries(card, kcontrol); scoped_guard(write_lock_irq, &card->controls_rwlock) { list_del(&kcontrol->list); card->controls_count -= kcontrol->count; } for (idx = 0; idx < kcontrol->count; idx++) snd_ctl_notify_one(card, SNDRV_CTL_EVENT_MASK_REMOVE, kcontrol, idx); snd_ctl_free_one(kcontrol); return 0; } static inline int snd_ctl_remove_locked(struct snd_card *card, struct snd_kcontrol *kcontrol) { return __snd_ctl_remove(card, kcontrol, true); } /** * snd_ctl_remove - remove the control from the card and release it * @card: the card instance * @kcontrol: the control instance to remove * * Removes the control from the card and then releases the instance. * You don't need to call snd_ctl_free_one(). * Passing NULL to @kcontrol argument is allowed as noop. * * Return: 0 if successful, or a negative error code on failure. * * Note that this function takes card->controls_rwsem lock internally. */ int snd_ctl_remove(struct snd_card *card, struct snd_kcontrol *kcontrol) { if (!kcontrol) return 0; guard(rwsem_write)(&card->controls_rwsem); return snd_ctl_remove_locked(card, kcontrol); } EXPORT_SYMBOL(snd_ctl_remove); /** * snd_ctl_remove_id - remove the control of the given id and release it * @card: the card instance * @id: the control id to remove * * Finds the control instance with the given id, removes it from the * card list and releases it. * * Return: 0 if successful, or a negative error code on failure. */ int snd_ctl_remove_id(struct snd_card *card, struct snd_ctl_elem_id *id) { struct snd_kcontrol *kctl; guard(rwsem_write)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, id); if (kctl == NULL) return -ENOENT; return snd_ctl_remove_locked(card, kctl); } EXPORT_SYMBOL(snd_ctl_remove_id); /** * snd_ctl_remove_user_ctl - remove and release the unlocked user control * @file: active control handle * @id: the control id to remove * * Finds the control instance with the given id, removes it from the * card list and releases it. * * Return: 0 if successful, or a negative error code on failure. */ static int snd_ctl_remove_user_ctl(struct snd_ctl_file * file, struct snd_ctl_elem_id *id) { struct snd_card *card = file->card; struct snd_kcontrol *kctl; int idx; guard(rwsem_write)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, id); if (kctl == NULL) return -ENOENT; if (!(kctl->vd[0].access & SNDRV_CTL_ELEM_ACCESS_USER)) return -EINVAL; for (idx = 0; idx < kctl->count; idx++) if (kctl->vd[idx].owner != NULL && kctl->vd[idx].owner != file) return -EBUSY; return snd_ctl_remove_locked(card, kctl); } /** * snd_ctl_activate_id - activate/inactivate the control of the given id * @card: the card instance * @id: the control id to activate/inactivate * @active: non-zero to activate * * Finds the control instance with the given id, and activate or * inactivate the control together with notification, if changed. * The given ID data is filled with full information. * * Return: 0 if unchanged, 1 if changed, or a negative error code on failure. */ int snd_ctl_activate_id(struct snd_card *card, struct snd_ctl_elem_id *id, int active) { struct snd_kcontrol *kctl; struct snd_kcontrol_volatile *vd; unsigned int index_offset; int ret; down_write(&card->controls_rwsem); kctl = snd_ctl_find_id(card, id); if (kctl == NULL) { ret = -ENOENT; goto unlock; } index_offset = snd_ctl_get_ioff(kctl, id); vd = &kctl->vd[index_offset]; ret = 0; if (active) { if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_INACTIVE)) goto unlock; vd->access &= ~SNDRV_CTL_ELEM_ACCESS_INACTIVE; } else { if (vd->access & SNDRV_CTL_ELEM_ACCESS_INACTIVE) goto unlock; vd->access |= SNDRV_CTL_ELEM_ACCESS_INACTIVE; } snd_ctl_build_ioff(id, kctl, index_offset); downgrade_write(&card->controls_rwsem); snd_ctl_notify_one(card, SNDRV_CTL_EVENT_MASK_INFO, kctl, index_offset); up_read(&card->controls_rwsem); return 1; unlock: up_write(&card->controls_rwsem); return ret; } EXPORT_SYMBOL_GPL(snd_ctl_activate_id); /** * snd_ctl_rename_id - replace the id of a control on the card * @card: the card instance * @src_id: the old id * @dst_id: the new id * * Finds the control with the old id from the card, and replaces the * id with the new one. * * The function tries to keep the already assigned numid while replacing * the rest. * * Note that this function should be used only in the card initialization * phase. Calling after the card instantiation may cause issues with * user-space expecting persistent numids. * * Return: Zero if successful, or a negative error code on failure. */ int snd_ctl_rename_id(struct snd_card *card, struct snd_ctl_elem_id *src_id, struct snd_ctl_elem_id *dst_id) { struct snd_kcontrol *kctl; int saved_numid; guard(rwsem_write)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, src_id); if (kctl == NULL) return -ENOENT; saved_numid = kctl->id.numid; remove_hash_entries(card, kctl); kctl->id = *dst_id; kctl->id.numid = saved_numid; add_hash_entries(card, kctl); return 0; } EXPORT_SYMBOL(snd_ctl_rename_id); /** * snd_ctl_rename - rename the control on the card * @card: the card instance * @kctl: the control to rename * @name: the new name * * Renames the specified control on the card to the new name. * * Note that this function takes card->controls_rwsem lock internally. */ void snd_ctl_rename(struct snd_card *card, struct snd_kcontrol *kctl, const char *name) { guard(rwsem_write)(&card->controls_rwsem); remove_hash_entries(card, kctl); if (strscpy(kctl->id.name, name, sizeof(kctl->id.name)) < 0) pr_warn("ALSA: Renamed control new name '%s' truncated to '%s'\n", name, kctl->id.name); add_hash_entries(card, kctl); } EXPORT_SYMBOL(snd_ctl_rename); #ifndef CONFIG_SND_CTL_FAST_LOOKUP static struct snd_kcontrol * snd_ctl_find_numid_slow(struct snd_card *card, unsigned int numid) { struct snd_kcontrol *kctl; guard(read_lock_irqsave)(&card->controls_rwlock); list_for_each_entry(kctl, &card->controls, list) { if (kctl->id.numid <= numid && kctl->id.numid + kctl->count > numid) return kctl; } return NULL; } #endif /* !CONFIG_SND_CTL_FAST_LOOKUP */ /** * snd_ctl_find_numid - find the control instance with the given number-id * @card: the card instance * @numid: the number-id to search * * Finds the control instance with the given number-id from the card. * * Return: The pointer of the instance if found, or %NULL if not. * * Note that this function takes card->controls_rwlock lock internally. */ struct snd_kcontrol *snd_ctl_find_numid(struct snd_card *card, unsigned int numid) { if (snd_BUG_ON(!card || !numid)) return NULL; #ifdef CONFIG_SND_CTL_FAST_LOOKUP return xa_load(&card->ctl_numids, numid); #else return snd_ctl_find_numid_slow(card, numid); #endif } EXPORT_SYMBOL(snd_ctl_find_numid); /** * snd_ctl_find_id - find the control instance with the given id * @card: the card instance * @id: the id to search * * Finds the control instance with the given id from the card. * * Return: The pointer of the instance if found, or %NULL if not. * * Note that this function takes card->controls_rwlock lock internally. */ struct snd_kcontrol *snd_ctl_find_id(struct snd_card *card, const struct snd_ctl_elem_id *id) { struct snd_kcontrol *kctl; if (snd_BUG_ON(!card || !id)) return NULL; if (id->numid != 0) return snd_ctl_find_numid(card, id->numid); #ifdef CONFIG_SND_CTL_FAST_LOOKUP kctl = xa_load(&card->ctl_hash, get_ctl_id_hash(id)); if (kctl && elem_id_matches(kctl, id)) return kctl; if (!card->ctl_hash_collision) return NULL; /* we can rely on only hash table */ #endif /* no matching in hash table - try all as the last resort */ guard(read_lock_irqsave)(&card->controls_rwlock); list_for_each_entry(kctl, &card->controls, list) if (elem_id_matches(kctl, id)) return kctl; return NULL; } EXPORT_SYMBOL(snd_ctl_find_id); static int snd_ctl_card_info(struct snd_card *card, struct snd_ctl_file * ctl, unsigned int cmd, void __user *arg) { struct snd_ctl_card_info *info __free(kfree) = NULL; info = kzalloc(sizeof(*info), GFP_KERNEL); if (! info) return -ENOMEM; scoped_guard(rwsem_read, &snd_ioctl_rwsem) { info->card = card->number; strscpy(info->id, card->id, sizeof(info->id)); strscpy(info->driver, card->driver, sizeof(info->driver)); strscpy(info->name, card->shortname, sizeof(info->name)); strscpy(info->longname, card->longname, sizeof(info->longname)); strscpy(info->mixername, card->mixername, sizeof(info->mixername)); strscpy(info->components, card->components, sizeof(info->components)); } if (copy_to_user(arg, info, sizeof(struct snd_ctl_card_info))) return -EFAULT; return 0; } static int snd_ctl_elem_list(struct snd_card *card, struct snd_ctl_elem_list *list) { struct snd_kcontrol *kctl; struct snd_ctl_elem_id id; unsigned int offset, space, jidx; offset = list->offset; space = list->space; guard(rwsem_read)(&card->controls_rwsem); list->count = card->controls_count; list->used = 0; if (!space) return 0; list_for_each_entry(kctl, &card->controls, list) { if (offset >= kctl->count) { offset -= kctl->count; continue; } for (jidx = offset; jidx < kctl->count; jidx++) { snd_ctl_build_ioff(&id, kctl, jidx); if (copy_to_user(list->pids + list->used, &id, sizeof(id))) return -EFAULT; list->used++; if (!--space) return 0; } offset = 0; } return 0; } static int snd_ctl_elem_list_user(struct snd_card *card, struct snd_ctl_elem_list __user *_list) { struct snd_ctl_elem_list list; int err; if (copy_from_user(&list, _list, sizeof(list))) return -EFAULT; err = snd_ctl_elem_list(card, &list); if (err) return err; if (copy_to_user(_list, &list, sizeof(list))) return -EFAULT; return 0; } /* Check whether the given kctl info is valid */ static int snd_ctl_check_elem_info(struct snd_card *card, const struct snd_ctl_elem_info *info) { static const unsigned int max_value_counts[] = { [SNDRV_CTL_ELEM_TYPE_BOOLEAN] = 128, [SNDRV_CTL_ELEM_TYPE_INTEGER] = 128, [SNDRV_CTL_ELEM_TYPE_ENUMERATED] = 128, [SNDRV_CTL_ELEM_TYPE_BYTES] = 512, [SNDRV_CTL_ELEM_TYPE_IEC958] = 1, [SNDRV_CTL_ELEM_TYPE_INTEGER64] = 64, }; if (info->type < SNDRV_CTL_ELEM_TYPE_BOOLEAN || info->type > SNDRV_CTL_ELEM_TYPE_INTEGER64) { if (card) dev_err(card->dev, "control %i:%i:%i:%s:%i: invalid type %d\n", info->id.iface, info->id.device, info->id.subdevice, info->id.name, info->id.index, info->type); return -EINVAL; } if (info->type == SNDRV_CTL_ELEM_TYPE_ENUMERATED && info->value.enumerated.items == 0) { if (card) dev_err(card->dev, "control %i:%i:%i:%s:%i: zero enum items\n", info->id.iface, info->id.device, info->id.subdevice, info->id.name, info->id.index); return -EINVAL; } if (info->count > max_value_counts[info->type]) { if (card) dev_err(card->dev, "control %i:%i:%i:%s:%i: invalid count %d\n", info->id.iface, info->id.device, info->id.subdevice, info->id.name, info->id.index, info->count); return -EINVAL; } return 0; } /* The capacity of struct snd_ctl_elem_value.value.*/ static const unsigned int value_sizes[] = { [SNDRV_CTL_ELEM_TYPE_BOOLEAN] = sizeof(long), [SNDRV_CTL_ELEM_TYPE_INTEGER] = sizeof(long), [SNDRV_CTL_ELEM_TYPE_ENUMERATED] = sizeof(unsigned int), [SNDRV_CTL_ELEM_TYPE_BYTES] = sizeof(unsigned char), [SNDRV_CTL_ELEM_TYPE_IEC958] = sizeof(struct snd_aes_iec958), [SNDRV_CTL_ELEM_TYPE_INTEGER64] = sizeof(long long), }; /* fill the remaining snd_ctl_elem_value data with the given pattern */ static void fill_remaining_elem_value(struct snd_ctl_elem_value *control, struct snd_ctl_elem_info *info, u32 pattern) { size_t offset = value_sizes[info->type] * info->count; offset = DIV_ROUND_UP(offset, sizeof(u32)); memset32((u32 *)control->value.bytes.data + offset, pattern, sizeof(control->value) / sizeof(u32) - offset); } /* check whether the given integer ctl value is valid */ static int sanity_check_int_value(struct snd_card *card, const struct snd_ctl_elem_value *control, const struct snd_ctl_elem_info *info, int i, bool print_error) { long long lval, lmin, lmax, lstep; u64 rem; switch (info->type) { default: case SNDRV_CTL_ELEM_TYPE_BOOLEAN: lval = control->value.integer.value[i]; lmin = 0; lmax = 1; lstep = 0; break; case SNDRV_CTL_ELEM_TYPE_INTEGER: lval = control->value.integer.value[i]; lmin = info->value.integer.min; lmax = info->value.integer.max; lstep = info->value.integer.step; break; case SNDRV_CTL_ELEM_TYPE_INTEGER64: lval = control->value.integer64.value[i]; lmin = info->value.integer64.min; lmax = info->value.integer64.max; lstep = info->value.integer64.step; break; case SNDRV_CTL_ELEM_TYPE_ENUMERATED: lval = control->value.enumerated.item[i]; lmin = 0; lmax = info->value.enumerated.items - 1; lstep = 0; break; } if (lval < lmin || lval > lmax) { if (print_error) dev_err(card->dev, "control %i:%i:%i:%s:%i: value out of range %lld (%lld/%lld) at count %i\n", control->id.iface, control->id.device, control->id.subdevice, control->id.name, control->id.index, lval, lmin, lmax, i); return -EINVAL; } if (lstep) { div64_u64_rem(lval, lstep, &rem); if (rem) { if (print_error) dev_err(card->dev, "control %i:%i:%i:%s:%i: unaligned value %lld (step %lld) at count %i\n", control->id.iface, control->id.device, control->id.subdevice, control->id.name, control->id.index, lval, lstep, i); return -EINVAL; } } return 0; } /* check whether the all input values are valid for the given elem value */ static int sanity_check_input_values(struct snd_card *card, const struct snd_ctl_elem_value *control, const struct snd_ctl_elem_info *info, bool print_error) { int i, ret; switch (info->type) { case SNDRV_CTL_ELEM_TYPE_BOOLEAN: case SNDRV_CTL_ELEM_TYPE_INTEGER: case SNDRV_CTL_ELEM_TYPE_INTEGER64: case SNDRV_CTL_ELEM_TYPE_ENUMERATED: for (i = 0; i < info->count; i++) { ret = sanity_check_int_value(card, control, info, i, print_error); if (ret < 0) return ret; } break; default: break; } return 0; } /* perform sanity checks to the given snd_ctl_elem_value object */ static int sanity_check_elem_value(struct snd_card *card, const struct snd_ctl_elem_value *control, const struct snd_ctl_elem_info *info, u32 pattern) { size_t offset; int ret; u32 *p; ret = sanity_check_input_values(card, control, info, true); if (ret < 0) return ret; /* check whether the remaining area kept untouched */ offset = value_sizes[info->type] * info->count; offset = DIV_ROUND_UP(offset, sizeof(u32)); p = (u32 *)control->value.bytes.data + offset; for (; offset < sizeof(control->value) / sizeof(u32); offset++, p++) { if (*p != pattern) { ret = -EINVAL; break; } *p = 0; /* clear the checked area */ } return ret; } static int __snd_ctl_elem_info(struct snd_card *card, struct snd_kcontrol *kctl, struct snd_ctl_elem_info *info, struct snd_ctl_file *ctl) { struct snd_kcontrol_volatile *vd; unsigned int index_offset; int result; #ifdef CONFIG_SND_DEBUG info->access = 0; #endif result = kctl->info(kctl, info); if (result >= 0) { snd_BUG_ON(info->access); index_offset = snd_ctl_get_ioff(kctl, &info->id); vd = &kctl->vd[index_offset]; snd_ctl_build_ioff(&info->id, kctl, index_offset); info->access = vd->access; if (vd->owner) { info->access |= SNDRV_CTL_ELEM_ACCESS_LOCK; if (vd->owner == ctl) info->access |= SNDRV_CTL_ELEM_ACCESS_OWNER; info->owner = pid_vnr(vd->owner->pid); } else { info->owner = -1; } if (!snd_ctl_skip_validation(info) && snd_ctl_check_elem_info(card, info) < 0) result = -EINVAL; } return result; } static int snd_ctl_elem_info(struct snd_ctl_file *ctl, struct snd_ctl_elem_info *info) { struct snd_card *card = ctl->card; struct snd_kcontrol *kctl; guard(rwsem_read)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, &info->id); if (!kctl) return -ENOENT; return __snd_ctl_elem_info(card, kctl, info, ctl); } static int snd_ctl_elem_info_user(struct snd_ctl_file *ctl, struct snd_ctl_elem_info __user *_info) { struct snd_card *card = ctl->card; struct snd_ctl_elem_info info; int result; if (copy_from_user(&info, _info, sizeof(info))) return -EFAULT; result = snd_power_ref_and_wait(card); if (result) return result; result = snd_ctl_elem_info(ctl, &info); snd_power_unref(card); if (result < 0) return result; /* drop internal access flags */ info.access &= ~(SNDRV_CTL_ELEM_ACCESS_SKIP_CHECK| SNDRV_CTL_ELEM_ACCESS_LED_MASK); if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return result; } static int snd_ctl_elem_read(struct snd_card *card, struct snd_ctl_elem_value *control) { struct snd_kcontrol *kctl; struct snd_kcontrol_volatile *vd; unsigned int index_offset; struct snd_ctl_elem_info info; const u32 pattern = 0xdeadbeef; int ret; guard(rwsem_read)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, &control->id); if (!kctl) return -ENOENT; index_offset = snd_ctl_get_ioff(kctl, &control->id); vd = &kctl->vd[index_offset]; if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || !kctl->get) return -EPERM; snd_ctl_build_ioff(&control->id, kctl, index_offset); #ifdef CONFIG_SND_CTL_DEBUG /* info is needed only for validation */ memset(&info, 0, sizeof(info)); info.id = control->id; ret = __snd_ctl_elem_info(card, kctl, &info, NULL); if (ret < 0) return ret; #endif if (!snd_ctl_skip_validation(&info)) fill_remaining_elem_value(control, &info, pattern); ret = kctl->get(kctl, control); if (ret < 0) return ret; if (!snd_ctl_skip_validation(&info) && sanity_check_elem_value(card, control, &info, pattern) < 0) { dev_err(card->dev, "control %i:%i:%i:%s:%i: access overflow\n", control->id.iface, control->id.device, control->id.subdevice, control->id.name, control->id.index); return -EINVAL; } return 0; } static int snd_ctl_elem_read_user(struct snd_card *card, struct snd_ctl_elem_value __user *_control) { struct snd_ctl_elem_value *control __free(kfree) = NULL; int result; control = memdup_user(_control, sizeof(*control)); if (IS_ERR(control)) return PTR_ERR(control); result = snd_power_ref_and_wait(card); if (result) return result; result = snd_ctl_elem_read(card, control); snd_power_unref(card); if (result < 0) return result; if (copy_to_user(_control, control, sizeof(*control))) return -EFAULT; return result; } static int snd_ctl_elem_write(struct snd_card *card, struct snd_ctl_file *file, struct snd_ctl_elem_value *control) { struct snd_kcontrol *kctl; struct snd_kcontrol_volatile *vd; unsigned int index_offset; int result = 0; down_write(&card->controls_rwsem); kctl = snd_ctl_find_id(card, &control->id); if (kctl == NULL) { up_write(&card->controls_rwsem); return -ENOENT; } index_offset = snd_ctl_get_ioff(kctl, &control->id); vd = &kctl->vd[index_offset]; if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_WRITE) || kctl->put == NULL || (file && vd->owner && vd->owner != file)) { up_write(&card->controls_rwsem); return -EPERM; } snd_ctl_build_ioff(&control->id, kctl, index_offset); /* validate input values */ if (IS_ENABLED(CONFIG_SND_CTL_INPUT_VALIDATION)) { struct snd_ctl_elem_info info; memset(&info, 0, sizeof(info)); info.id = control->id; result = __snd_ctl_elem_info(card, kctl, &info, NULL); if (!result) result = sanity_check_input_values(card, control, &info, false); } if (!result) result = kctl->put(kctl, control); if (result < 0) { up_write(&card->controls_rwsem); return result; } if (result > 0) { downgrade_write(&card->controls_rwsem); snd_ctl_notify_one(card, SNDRV_CTL_EVENT_MASK_VALUE, kctl, index_offset); up_read(&card->controls_rwsem); } else { up_write(&card->controls_rwsem); } return 0; } static int snd_ctl_elem_write_user(struct snd_ctl_file *file, struct snd_ctl_elem_value __user *_control) { struct snd_ctl_elem_value *control __free(kfree) = NULL; struct snd_card *card; int result; control = memdup_user(_control, sizeof(*control)); if (IS_ERR(control)) return PTR_ERR(control); card = file->card; result = snd_power_ref_and_wait(card); if (result < 0) return result; result = snd_ctl_elem_write(card, file, control); snd_power_unref(card); if (result < 0) return result; if (copy_to_user(_control, control, sizeof(*control))) return -EFAULT; return result; } static int snd_ctl_elem_lock(struct snd_ctl_file *file, struct snd_ctl_elem_id __user *_id) { struct snd_card *card = file->card; struct snd_ctl_elem_id id; struct snd_kcontrol *kctl; struct snd_kcontrol_volatile *vd; if (copy_from_user(&id, _id, sizeof(id))) return -EFAULT; guard(rwsem_write)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, &id); if (!kctl) return -ENOENT; vd = &kctl->vd[snd_ctl_get_ioff(kctl, &id)]; if (vd->owner) return -EBUSY; vd->owner = file; return 0; } static int snd_ctl_elem_unlock(struct snd_ctl_file *file, struct snd_ctl_elem_id __user *_id) { struct snd_card *card = file->card; struct snd_ctl_elem_id id; struct snd_kcontrol *kctl; struct snd_kcontrol_volatile *vd; if (copy_from_user(&id, _id, sizeof(id))) return -EFAULT; guard(rwsem_write)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, &id); if (!kctl) return -ENOENT; vd = &kctl->vd[snd_ctl_get_ioff(kctl, &id)]; if (!vd->owner) return -EINVAL; if (vd->owner != file) return -EPERM; vd->owner = NULL; return 0; } struct user_element { struct snd_ctl_elem_info info; struct snd_card *card; char *elem_data; /* element data */ unsigned long elem_data_size; /* size of element data in bytes */ void *tlv_data; /* TLV data */ unsigned long tlv_data_size; /* TLV data size */ void *priv_data; /* private data (like strings for enumerated type) */ }; // check whether the addition (in bytes) of user ctl element may overflow the limit. static bool check_user_elem_overflow(struct snd_card *card, ssize_t add) { return (ssize_t)card->user_ctl_alloc_size + add > max_user_ctl_alloc_size; } static int snd_ctl_elem_user_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { struct user_element *ue = kcontrol->private_data; unsigned int offset; offset = snd_ctl_get_ioff(kcontrol, &uinfo->id); *uinfo = ue->info; snd_ctl_build_ioff(&uinfo->id, kcontrol, offset); return 0; } static int snd_ctl_elem_user_enum_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { struct user_element *ue = kcontrol->private_data; const char *names; unsigned int item; unsigned int offset; item = uinfo->value.enumerated.item; offset = snd_ctl_get_ioff(kcontrol, &uinfo->id); *uinfo = ue->info; snd_ctl_build_ioff(&uinfo->id, kcontrol, offset); item = min(item, uinfo->value.enumerated.items - 1); uinfo->value.enumerated.item = item; names = ue->priv_data; for (; item > 0; --item) names += strlen(names) + 1; strcpy(uinfo->value.enumerated.name, names); return 0; } static int snd_ctl_elem_user_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct user_element *ue = kcontrol->private_data; unsigned int size = ue->elem_data_size; char *src = ue->elem_data + snd_ctl_get_ioff(kcontrol, &ucontrol->id) * size; memcpy(&ucontrol->value, src, size); return 0; } static int snd_ctl_elem_user_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { int err, change; struct user_element *ue = kcontrol->private_data; unsigned int size = ue->elem_data_size; char *dst = ue->elem_data + snd_ctl_get_ioff(kcontrol, &ucontrol->id) * size; err = sanity_check_input_values(ue->card, ucontrol, &ue->info, false); if (err < 0) return err; change = memcmp(&ucontrol->value, dst, size) != 0; if (change) memcpy(dst, &ucontrol->value, size); return change; } /* called in controls_rwsem write lock */ static int replace_user_tlv(struct snd_kcontrol *kctl, unsigned int __user *buf, unsigned int size) { struct user_element *ue = kctl->private_data; unsigned int *container; unsigned int mask = 0; int i; int change; lockdep_assert_held_write(&ue->card->controls_rwsem); if (size > 1024 * 128) /* sane value */ return -EINVAL; // does the TLV size change cause overflow? if (check_user_elem_overflow(ue->card, (ssize_t)(size - ue->tlv_data_size))) return -ENOMEM; container = vmemdup_user(buf, size); if (IS_ERR(container)) return PTR_ERR(container); change = ue->tlv_data_size != size; if (!change) change = memcmp(ue->tlv_data, container, size) != 0; if (!change) { kvfree(container); return 0; } if (ue->tlv_data == NULL) { /* Now TLV data is available. */ for (i = 0; i < kctl->count; ++i) kctl->vd[i].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ; mask = SNDRV_CTL_EVENT_MASK_INFO; } else { ue->card->user_ctl_alloc_size -= ue->tlv_data_size; ue->tlv_data_size = 0; kvfree(ue->tlv_data); } ue->tlv_data = container; ue->tlv_data_size = size; // decremented at private_free. ue->card->user_ctl_alloc_size += size; mask |= SNDRV_CTL_EVENT_MASK_TLV; for (i = 0; i < kctl->count; ++i) snd_ctl_notify_one(ue->card, mask, kctl, i); return change; } static int read_user_tlv(struct snd_kcontrol *kctl, unsigned int __user *buf, unsigned int size) { struct user_element *ue = kctl->private_data; if (ue->tlv_data_size == 0 || ue->tlv_data == NULL) return -ENXIO; if (size < ue->tlv_data_size) return -ENOSPC; if (copy_to_user(buf, ue->tlv_data, ue->tlv_data_size)) return -EFAULT; return 0; } static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kctl, int op_flag, unsigned int size, unsigned int __user *buf) { if (op_flag == SNDRV_CTL_TLV_OP_WRITE) return replace_user_tlv(kctl, buf, size); else return read_user_tlv(kctl, buf, size); } /* called in controls_rwsem write lock */ static int snd_ctl_elem_init_enum_names(struct user_element *ue) { char *names, *p; size_t buf_len, name_len; unsigned int i; const uintptr_t user_ptrval = ue->info.value.enumerated.names_ptr; lockdep_assert_held_write(&ue->card->controls_rwsem); buf_len = ue->info.value.enumerated.names_length; if (buf_len > 64 * 1024) return -EINVAL; if (check_user_elem_overflow(ue->card, buf_len)) return -ENOMEM; names = vmemdup_user((const void __user *)user_ptrval, buf_len); if (IS_ERR(names)) return PTR_ERR(names); /* check that there are enough valid names */ p = names; for (i = 0; i < ue->info.value.enumerated.items; ++i) { name_len = strnlen(p, buf_len); if (name_len == 0 || name_len >= 64 || name_len == buf_len) { kvfree(names); return -EINVAL; } p += name_len + 1; buf_len -= name_len + 1; } ue->priv_data = names; ue->info.value.enumerated.names_ptr = 0; // increment the allocation size; decremented again at private_free. ue->card->user_ctl_alloc_size += ue->info.value.enumerated.names_length; return 0; } static size_t compute_user_elem_size(size_t size, unsigned int count) { return sizeof(struct user_element) + size * count; } static void snd_ctl_elem_user_free(struct snd_kcontrol *kcontrol) { struct user_element *ue = kcontrol->private_data; // decrement the allocation size. ue->card->user_ctl_alloc_size -= compute_user_elem_size(ue->elem_data_size, kcontrol->count); ue->card->user_ctl_alloc_size -= ue->tlv_data_size; if (ue->priv_data) ue->card->user_ctl_alloc_size -= ue->info.value.enumerated.names_length; kvfree(ue->tlv_data); kvfree(ue->priv_data); kfree(ue); } static int snd_ctl_elem_add(struct snd_ctl_file *file, struct snd_ctl_elem_info *info, int replace) { struct snd_card *card = file->card; struct snd_kcontrol *kctl; unsigned int count; unsigned int access; long private_size; size_t alloc_size; struct user_element *ue; unsigned int offset; int err; if (!*info->id.name) return -EINVAL; if (strnlen(info->id.name, sizeof(info->id.name)) >= sizeof(info->id.name)) return -EINVAL; /* Delete a control to replace them if needed. */ if (replace) { info->id.numid = 0; err = snd_ctl_remove_user_ctl(file, &info->id); if (err) return err; } /* Check the number of elements for this userspace control. */ count = info->owner; if (count == 0) count = 1; if (count > MAX_CONTROL_COUNT) return -EINVAL; /* Arrange access permissions if needed. */ access = info->access; if (access == 0) access = SNDRV_CTL_ELEM_ACCESS_READWRITE; access &= (SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE | SNDRV_CTL_ELEM_ACCESS_TLV_WRITE); /* In initial state, nothing is available as TLV container. */ if (access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) access |= SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK; access |= SNDRV_CTL_ELEM_ACCESS_USER; /* * Check information and calculate the size of data specific to * this userspace control. */ /* pass NULL to card for suppressing error messages */ err = snd_ctl_check_elem_info(NULL, info); if (err < 0) return err; /* user-space control doesn't allow zero-size data */ if (info->count < 1) return -EINVAL; private_size = value_sizes[info->type] * info->count; alloc_size = compute_user_elem_size(private_size, count); guard(rwsem_write)(&card->controls_rwsem); if (check_user_elem_overflow(card, alloc_size)) return -ENOMEM; /* * Keep memory object for this userspace control. After passing this * code block, the instance should be freed by snd_ctl_free_one(). * * Note that these elements in this control are locked. */ err = snd_ctl_new(&kctl, count, access, file); if (err < 0) return err; memcpy(&kctl->id, &info->id, sizeof(kctl->id)); ue = kzalloc(alloc_size, GFP_KERNEL); if (!ue) { kfree(kctl); return -ENOMEM; } kctl->private_data = ue; kctl->private_free = snd_ctl_elem_user_free; // increment the allocated size; decremented again at private_free. card->user_ctl_alloc_size += alloc_size; /* Set private data for this userspace control. */ ue->card = card; ue->info = *info; ue->info.access = 0; ue->elem_data = (char *)ue + sizeof(*ue); ue->elem_data_size = private_size; if (ue->info.type == SNDRV_CTL_ELEM_TYPE_ENUMERATED) { err = snd_ctl_elem_init_enum_names(ue); if (err < 0) { snd_ctl_free_one(kctl); return err; } } /* Set callback functions. */ if (info->type == SNDRV_CTL_ELEM_TYPE_ENUMERATED) kctl->info = snd_ctl_elem_user_enum_info; else kctl->info = snd_ctl_elem_user_info; if (access & SNDRV_CTL_ELEM_ACCESS_READ) kctl->get = snd_ctl_elem_user_get; if (access & SNDRV_CTL_ELEM_ACCESS_WRITE) kctl->put = snd_ctl_elem_user_put; if (access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) kctl->tlv.c = snd_ctl_elem_user_tlv; /* This function manage to free the instance on failure. */ err = __snd_ctl_add_replace(card, kctl, CTL_ADD_EXCLUSIVE); if (err < 0) { snd_ctl_free_one(kctl); return err; } offset = snd_ctl_get_ioff(kctl, &info->id); snd_ctl_build_ioff(&info->id, kctl, offset); /* * Here we cannot fill any field for the number of elements added by * this operation because there're no specific fields. The usage of * 'owner' field for this purpose may cause any bugs to userspace * applications because the field originally means PID of a process * which locks the element. */ return 0; } static int snd_ctl_elem_add_user(struct snd_ctl_file *file, struct snd_ctl_elem_info __user *_info, int replace) { struct snd_ctl_elem_info info; int err; if (copy_from_user(&info, _info, sizeof(info))) return -EFAULT; err = snd_ctl_elem_add(file, &info, replace); if (err < 0) return err; if (copy_to_user(_info, &info, sizeof(info))) { snd_ctl_remove_user_ctl(file, &info.id); return -EFAULT; } return 0; } static int snd_ctl_elem_remove(struct snd_ctl_file *file, struct snd_ctl_elem_id __user *_id) { struct snd_ctl_elem_id id; if (copy_from_user(&id, _id, sizeof(id))) return -EFAULT; return snd_ctl_remove_user_ctl(file, &id); } static int snd_ctl_subscribe_events(struct snd_ctl_file *file, int __user *ptr) { int subscribe; if (get_user(subscribe, ptr)) return -EFAULT; if (subscribe < 0) { subscribe = file->subscribed; if (put_user(subscribe, ptr)) return -EFAULT; return 0; } if (subscribe) { file->subscribed = 1; return 0; } else if (file->subscribed) { snd_ctl_empty_read_queue(file); file->subscribed = 0; } return 0; } static int call_tlv_handler(struct snd_ctl_file *file, int op_flag, struct snd_kcontrol *kctl, struct snd_ctl_elem_id *id, unsigned int __user *buf, unsigned int size) { static const struct { int op; int perm; } pairs[] = { {SNDRV_CTL_TLV_OP_READ, SNDRV_CTL_ELEM_ACCESS_TLV_READ}, {SNDRV_CTL_TLV_OP_WRITE, SNDRV_CTL_ELEM_ACCESS_TLV_WRITE}, {SNDRV_CTL_TLV_OP_CMD, SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND}, }; struct snd_kcontrol_volatile *vd = &kctl->vd[snd_ctl_get_ioff(kctl, id)]; int i; /* Check support of the request for this element. */ for (i = 0; i < ARRAY_SIZE(pairs); ++i) { if (op_flag == pairs[i].op && (vd->access & pairs[i].perm)) break; } if (i == ARRAY_SIZE(pairs)) return -ENXIO; if (kctl->tlv.c == NULL) return -ENXIO; /* Write and command operations are not allowed for locked element. */ if (op_flag != SNDRV_CTL_TLV_OP_READ && vd->owner != NULL && vd->owner != file) return -EPERM; return kctl->tlv.c(kctl, op_flag, size, buf); } static int read_tlv_buf(struct snd_kcontrol *kctl, struct snd_ctl_elem_id *id, unsigned int __user *buf, unsigned int size) { struct snd_kcontrol_volatile *vd = &kctl->vd[snd_ctl_get_ioff(kctl, id)]; unsigned int len; if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_READ)) return -ENXIO; if (kctl->tlv.p == NULL) return -ENXIO; len = sizeof(unsigned int) * 2 + kctl->tlv.p[1]; if (size < len) return -ENOMEM; if (copy_to_user(buf, kctl->tlv.p, len)) return -EFAULT; return 0; } static int snd_ctl_tlv_ioctl(struct snd_ctl_file *file, struct snd_ctl_tlv __user *buf, int op_flag) { struct snd_ctl_tlv header; unsigned int __user *container; unsigned int container_size; struct snd_kcontrol *kctl; struct snd_ctl_elem_id id; struct snd_kcontrol_volatile *vd; lockdep_assert_held(&file->card->controls_rwsem); if (copy_from_user(&header, buf, sizeof(header))) return -EFAULT; /* In design of control core, numerical ID starts at 1. */ if (header.numid == 0) return -EINVAL; /* At least, container should include type and length fields. */ if (header.length < sizeof(unsigned int) * 2) return -EINVAL; container_size = header.length; container = buf->tlv; kctl = snd_ctl_find_numid(file->card, header.numid); if (kctl == NULL) return -ENOENT; /* Calculate index of the element in this set. */ id = kctl->id; snd_ctl_build_ioff(&id, kctl, header.numid - id.numid); vd = &kctl->vd[snd_ctl_get_ioff(kctl, &id)]; if (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) { return call_tlv_handler(file, op_flag, kctl, &id, container, container_size); } else { if (op_flag == SNDRV_CTL_TLV_OP_READ) { return read_tlv_buf(kctl, &id, container, container_size); } } /* Not supported. */ return -ENXIO; } static long snd_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct snd_ctl_file *ctl; struct snd_card *card; struct snd_kctl_ioctl *p; void __user *argp = (void __user *)arg; int __user *ip = argp; int err; ctl = file->private_data; card = ctl->card; if (snd_BUG_ON(!card)) return -ENXIO; switch (cmd) { case SNDRV_CTL_IOCTL_PVERSION: return put_user(SNDRV_CTL_VERSION, ip) ? -EFAULT : 0; case SNDRV_CTL_IOCTL_CARD_INFO: return snd_ctl_card_info(card, ctl, cmd, argp); case SNDRV_CTL_IOCTL_ELEM_LIST: return snd_ctl_elem_list_user(card, argp); case SNDRV_CTL_IOCTL_ELEM_INFO: return snd_ctl_elem_info_user(ctl, argp); case SNDRV_CTL_IOCTL_ELEM_READ: return snd_ctl_elem_read_user(card, argp); case SNDRV_CTL_IOCTL_ELEM_WRITE: return snd_ctl_elem_write_user(ctl, argp); case SNDRV_CTL_IOCTL_ELEM_LOCK: return snd_ctl_elem_lock(ctl, argp); case SNDRV_CTL_IOCTL_ELEM_UNLOCK: return snd_ctl_elem_unlock(ctl, argp); case SNDRV_CTL_IOCTL_ELEM_ADD: return snd_ctl_elem_add_user(ctl, argp, 0); case SNDRV_CTL_IOCTL_ELEM_REPLACE: return snd_ctl_elem_add_user(ctl, argp, 1); case SNDRV_CTL_IOCTL_ELEM_REMOVE: return snd_ctl_elem_remove(ctl, argp); case SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS: return snd_ctl_subscribe_events(ctl, ip); case SNDRV_CTL_IOCTL_TLV_READ: err = snd_power_ref_and_wait(card); if (err < 0) return err; scoped_guard(rwsem_read, &card->controls_rwsem) err = snd_ctl_tlv_ioctl(ctl, argp, SNDRV_CTL_TLV_OP_READ); snd_power_unref(card); return err; case SNDRV_CTL_IOCTL_TLV_WRITE: err = snd_power_ref_and_wait(card); if (err < 0) return err; scoped_guard(rwsem_write, &card->controls_rwsem) err = snd_ctl_tlv_ioctl(ctl, argp, SNDRV_CTL_TLV_OP_WRITE); snd_power_unref(card); return err; case SNDRV_CTL_IOCTL_TLV_COMMAND: err = snd_power_ref_and_wait(card); if (err < 0) return err; scoped_guard(rwsem_write, &card->controls_rwsem) err = snd_ctl_tlv_ioctl(ctl, argp, SNDRV_CTL_TLV_OP_CMD); snd_power_unref(card); return err; case SNDRV_CTL_IOCTL_POWER: return -ENOPROTOOPT; case SNDRV_CTL_IOCTL_POWER_STATE: return put_user(SNDRV_CTL_POWER_D0, ip) ? -EFAULT : 0; } guard(rwsem_read)(&snd_ioctl_rwsem); list_for_each_entry(p, &snd_control_ioctls, list) { err = p->fioctl(card, ctl, cmd, arg); if (err != -ENOIOCTLCMD) return err; } dev_dbg(card->dev, "unknown ioctl = 0x%x\n", cmd); return -ENOTTY; } static ssize_t snd_ctl_read(struct file *file, char __user *buffer, size_t count, loff_t * offset) { struct snd_ctl_file *ctl; int err = 0; ssize_t result = 0; ctl = file->private_data; if (snd_BUG_ON(!ctl || !ctl->card)) return -ENXIO; if (!ctl->subscribed) return -EBADFD; if (count < sizeof(struct snd_ctl_event)) return -EINVAL; spin_lock_irq(&ctl->read_lock); while (count >= sizeof(struct snd_ctl_event)) { struct snd_ctl_event ev; struct snd_kctl_event *kev; while (list_empty(&ctl->events)) { wait_queue_entry_t wait; if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) { err = -EAGAIN; goto __end_lock; } init_waitqueue_entry(&wait, current); add_wait_queue(&ctl->change_sleep, &wait); set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irq(&ctl->read_lock); schedule(); remove_wait_queue(&ctl->change_sleep, &wait); if (ctl->card->shutdown) return -ENODEV; if (signal_pending(current)) return -ERESTARTSYS; spin_lock_irq(&ctl->read_lock); } kev = snd_kctl_event(ctl->events.next); ev.type = SNDRV_CTL_EVENT_ELEM; ev.data.elem.mask = kev->mask; ev.data.elem.id = kev->id; list_del(&kev->list); spin_unlock_irq(&ctl->read_lock); kfree(kev); if (copy_to_user(buffer, &ev, sizeof(struct snd_ctl_event))) { err = -EFAULT; goto __end; } spin_lock_irq(&ctl->read_lock); buffer += sizeof(struct snd_ctl_event); count -= sizeof(struct snd_ctl_event); result += sizeof(struct snd_ctl_event); } __end_lock: spin_unlock_irq(&ctl->read_lock); __end: return result > 0 ? result : err; } static __poll_t snd_ctl_poll(struct file *file, poll_table * wait) { __poll_t mask; struct snd_ctl_file *ctl; ctl = file->private_data; if (!ctl->subscribed) return 0; poll_wait(file, &ctl->change_sleep, wait); mask = 0; if (!list_empty(&ctl->events)) mask |= EPOLLIN | EPOLLRDNORM; return mask; } /* * register the device-specific control-ioctls. * called from each device manager like pcm.c, hwdep.c, etc. */ static int _snd_ctl_register_ioctl(snd_kctl_ioctl_func_t fcn, struct list_head *lists) { struct snd_kctl_ioctl *pn; pn = kzalloc(sizeof(struct snd_kctl_ioctl), GFP_KERNEL); if (pn == NULL) return -ENOMEM; pn->fioctl = fcn; guard(rwsem_write)(&snd_ioctl_rwsem); list_add_tail(&pn->list, lists); return 0; } /** * snd_ctl_register_ioctl - register the device-specific control-ioctls * @fcn: ioctl callback function * * called from each device manager like pcm.c, hwdep.c, etc. * * Return: zero if successful, or a negative error code */ int snd_ctl_register_ioctl(snd_kctl_ioctl_func_t fcn) { return _snd_ctl_register_ioctl(fcn, &snd_control_ioctls); } EXPORT_SYMBOL(snd_ctl_register_ioctl); #ifdef CONFIG_COMPAT /** * snd_ctl_register_ioctl_compat - register the device-specific 32bit compat * control-ioctls * @fcn: ioctl callback function * * Return: zero if successful, or a negative error code */ int snd_ctl_register_ioctl_compat(snd_kctl_ioctl_func_t fcn) { return _snd_ctl_register_ioctl(fcn, &snd_control_compat_ioctls); } EXPORT_SYMBOL(snd_ctl_register_ioctl_compat); #endif /* * de-register the device-specific control-ioctls. */ static int _snd_ctl_unregister_ioctl(snd_kctl_ioctl_func_t fcn, struct list_head *lists) { struct snd_kctl_ioctl *p; if (snd_BUG_ON(!fcn)) return -EINVAL; guard(rwsem_write)(&snd_ioctl_rwsem); list_for_each_entry(p, lists, list) { if (p->fioctl == fcn) { list_del(&p->list); kfree(p); return 0; } } snd_BUG(); return -EINVAL; } /** * snd_ctl_unregister_ioctl - de-register the device-specific control-ioctls * @fcn: ioctl callback function to unregister * * Return: zero if successful, or a negative error code */ int snd_ctl_unregister_ioctl(snd_kctl_ioctl_func_t fcn) { return _snd_ctl_unregister_ioctl(fcn, &snd_control_ioctls); } EXPORT_SYMBOL(snd_ctl_unregister_ioctl); #ifdef CONFIG_COMPAT /** * snd_ctl_unregister_ioctl_compat - de-register the device-specific compat * 32bit control-ioctls * @fcn: ioctl callback function to unregister * * Return: zero if successful, or a negative error code */ int snd_ctl_unregister_ioctl_compat(snd_kctl_ioctl_func_t fcn) { return _snd_ctl_unregister_ioctl(fcn, &snd_control_compat_ioctls); } EXPORT_SYMBOL(snd_ctl_unregister_ioctl_compat); #endif static int snd_ctl_fasync(int fd, struct file * file, int on) { struct snd_ctl_file *ctl; ctl = file->private_data; return snd_fasync_helper(fd, file, on, &ctl->fasync); } /* return the preferred subdevice number if already assigned; * otherwise return -1 */ int snd_ctl_get_preferred_subdevice(struct snd_card *card, int type) { struct snd_ctl_file *kctl; int subdevice = -1; guard(read_lock_irqsave)(&card->controls_rwlock); list_for_each_entry(kctl, &card->ctl_files, list) { if (kctl->pid == task_pid(current)) { subdevice = kctl->preferred_subdevice[type]; if (subdevice != -1) break; } } return subdevice; } EXPORT_SYMBOL_GPL(snd_ctl_get_preferred_subdevice); /* * ioctl32 compat */ #ifdef CONFIG_COMPAT #include "control_compat.c" #else #define snd_ctl_ioctl_compat NULL #endif /* * control layers (audio LED etc.) */ /** * snd_ctl_request_layer - request to use the layer * @module_name: Name of the kernel module (NULL == build-in) * * Return: zero if successful, or an error code when the module cannot be loaded */ int snd_ctl_request_layer(const char *module_name) { struct snd_ctl_layer_ops *lops; if (module_name == NULL) return 0; scoped_guard(rwsem_read, &snd_ctl_layer_rwsem) { for (lops = snd_ctl_layer; lops; lops = lops->next) if (strcmp(lops->module_name, module_name) == 0) return 0; } return request_module(module_name); } EXPORT_SYMBOL_GPL(snd_ctl_request_layer); /** * snd_ctl_register_layer - register new control layer * @lops: operation structure * * The new layer can track all control elements and do additional * operations on top (like audio LED handling). */ void snd_ctl_register_layer(struct snd_ctl_layer_ops *lops) { struct snd_card *card; int card_number; scoped_guard(rwsem_write, &snd_ctl_layer_rwsem) { lops->next = snd_ctl_layer; snd_ctl_layer = lops; } for (card_number = 0; card_number < SNDRV_CARDS; card_number++) { card = snd_card_ref(card_number); if (card) { scoped_guard(rwsem_read, &card->controls_rwsem) lops->lregister(card); snd_card_unref(card); } } } EXPORT_SYMBOL_GPL(snd_ctl_register_layer); /** * snd_ctl_disconnect_layer - disconnect control layer * @lops: operation structure * * It is expected that the information about tracked cards * is freed before this call (the disconnect callback is * not called here). */ void snd_ctl_disconnect_layer(struct snd_ctl_layer_ops *lops) { struct snd_ctl_layer_ops *lops2, *prev_lops2; guard(rwsem_write)(&snd_ctl_layer_rwsem); for (lops2 = snd_ctl_layer, prev_lops2 = NULL; lops2; lops2 = lops2->next) { if (lops2 == lops) { if (!prev_lops2) snd_ctl_layer = lops->next; else prev_lops2->next = lops->next; break; } prev_lops2 = lops2; } } EXPORT_SYMBOL_GPL(snd_ctl_disconnect_layer); /* * INIT PART */ static const struct file_operations snd_ctl_f_ops = { .owner = THIS_MODULE, .read = snd_ctl_read, .open = snd_ctl_open, .release = snd_ctl_release, .poll = snd_ctl_poll, .unlocked_ioctl = snd_ctl_ioctl, .compat_ioctl = snd_ctl_ioctl_compat, .fasync = snd_ctl_fasync, }; /* call lops under rwsems; called from snd_ctl_dev_*() below() */ #define call_snd_ctl_lops(_card, _op) \ do { \ struct snd_ctl_layer_ops *lops; \ guard(rwsem_read)(&(_card)->controls_rwsem); \ guard(rwsem_read)(&snd_ctl_layer_rwsem); \ for (lops = snd_ctl_layer; lops; lops = lops->next) \ lops->_op(_card); \ } while (0) /* * registration of the control device */ static int snd_ctl_dev_register(struct snd_device *device) { struct snd_card *card = device->device_data; int err; err = snd_register_device(SNDRV_DEVICE_TYPE_CONTROL, card, -1, &snd_ctl_f_ops, card, card->ctl_dev); if (err < 0) return err; call_snd_ctl_lops(card, lregister); return 0; } /* * disconnection of the control device */ static int snd_ctl_dev_disconnect(struct snd_device *device) { struct snd_card *card = device->device_data; struct snd_ctl_file *ctl; scoped_guard(read_lock_irqsave, &card->controls_rwlock) { list_for_each_entry(ctl, &card->ctl_files, list) { wake_up(&ctl->change_sleep); snd_kill_fasync(ctl->fasync, SIGIO, POLL_ERR); } } call_snd_ctl_lops(card, ldisconnect); return snd_unregister_device(card->ctl_dev); } /* * free all controls */ static int snd_ctl_dev_free(struct snd_device *device) { struct snd_card *card = device->device_data; struct snd_kcontrol *control; scoped_guard(rwsem_write, &card->controls_rwsem) { while (!list_empty(&card->controls)) { control = snd_kcontrol(card->controls.next); __snd_ctl_remove(card, control, false); } #ifdef CONFIG_SND_CTL_FAST_LOOKUP xa_destroy(&card->ctl_numids); xa_destroy(&card->ctl_hash); #endif } put_device(card->ctl_dev); return 0; } /* * create control core: * called from init.c */ int snd_ctl_create(struct snd_card *card) { static const struct snd_device_ops ops = { .dev_free = snd_ctl_dev_free, .dev_register = snd_ctl_dev_register, .dev_disconnect = snd_ctl_dev_disconnect, }; int err; if (snd_BUG_ON(!card)) return -ENXIO; if (snd_BUG_ON(card->number < 0 || card->number >= SNDRV_CARDS)) return -ENXIO; err = snd_device_alloc(&card->ctl_dev, card); if (err < 0) return err; dev_set_name(card->ctl_dev, "controlC%d", card->number); err = snd_device_new(card, SNDRV_DEV_CONTROL, card, &ops); if (err < 0) put_device(card->ctl_dev); return err; } /* * Frequently used control callbacks/helpers */ /** * snd_ctl_boolean_mono_info - Helper function for a standard boolean info * callback with a mono channel * @kcontrol: the kcontrol instance * @uinfo: info to store * * This is a function that can be used as info callback for a standard * boolean control with a single mono channel. * * Return: Zero (always successful) */ int snd_ctl_boolean_mono_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; uinfo->count = 1; uinfo->value.integer.min = 0; uinfo->value.integer.max = 1; return 0; } EXPORT_SYMBOL(snd_ctl_boolean_mono_info); /** * snd_ctl_boolean_stereo_info - Helper function for a standard boolean info * callback with stereo two channels * @kcontrol: the kcontrol instance * @uinfo: info to store * * This is a function that can be used as info callback for a standard * boolean control with stereo two channels. * * Return: Zero (always successful) */ int snd_ctl_boolean_stereo_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; uinfo->count = 2; uinfo->value.integer.min = 0; uinfo->value.integer.max = 1; return 0; } EXPORT_SYMBOL(snd_ctl_boolean_stereo_info); /** * snd_ctl_enum_info - fills the info structure for an enumerated control * @info: the structure to be filled * @channels: the number of the control's channels; often one * @items: the number of control values; also the size of @names * @names: an array containing the names of all control values * * Sets all required fields in @info to their appropriate values. * If the control's accessibility is not the default (readable and writable), * the caller has to fill @info->access. * * Return: Zero (always successful) */ int snd_ctl_enum_info(struct snd_ctl_elem_info *info, unsigned int channels, unsigned int items, const char *const names[]) { info->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; info->count = channels; info->value.enumerated.items = items; if (!items) return 0; if (info->value.enumerated.item >= items) info->value.enumerated.item = items - 1; WARN(strlen(names[info->value.enumerated.item]) >= sizeof(info->value.enumerated.name), "ALSA: too long item name '%s'\n", names[info->value.enumerated.item]); strscpy(info->value.enumerated.name, names[info->value.enumerated.item], sizeof(info->value.enumerated.name)); return 0; } EXPORT_SYMBOL(snd_ctl_enum_info);
5 5 5 11 14 14 13 10 8 5 4 4 6 5 5 5 5 2 2 2 2 2 2 50 33 51 33 41 9 41 9 9 33 33 24 33 16 17 2 15 2 2 17 4 51 34 51 2 1 2 3 2 3 3 3 3 3 1 2 1 1 1 5 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2016 Facebook */ #include <linux/bpf.h> #include <linux/jhash.h> #include <linux/filter.h> #include <linux/kernel.h> #include <linux/stacktrace.h> #include <linux/perf_event.h> #include <linux/btf_ids.h> #include <linux/buildid.h> #include "percpu_freelist.h" #include "mmap_unlock_work.h" #define STACK_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \ BPF_F_STACK_BUILD_ID) struct stack_map_bucket { struct pcpu_freelist_node fnode; u32 hash; u32 nr; u64 data[]; }; struct bpf_stack_map { struct bpf_map map; void *elems; struct pcpu_freelist freelist; u32 n_buckets; struct stack_map_bucket *buckets[] __counted_by(n_buckets); }; static inline bool stack_map_use_build_id(struct bpf_map *map) { return (map->map_flags & BPF_F_STACK_BUILD_ID); } static inline int stack_map_data_size(struct bpf_map *map) { return stack_map_use_build_id(map) ? sizeof(struct bpf_stack_build_id) : sizeof(u64); } static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) { u64 elem_size = sizeof(struct stack_map_bucket) + (u64)smap->map.value_size; int err; smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, smap->map.numa_node); if (!smap->elems) return -ENOMEM; err = pcpu_freelist_init(&smap->freelist); if (err) goto free_elems; pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size, smap->map.max_entries); return 0; free_elems: bpf_map_area_free(smap->elems); return err; } /* Called from syscall */ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) { u32 value_size = attr->value_size; struct bpf_stack_map *smap; u64 cost, n_buckets; int err; if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || value_size < 8 || value_size % 8) return ERR_PTR(-EINVAL); BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64)); if (attr->map_flags & BPF_F_STACK_BUILD_ID) { if (value_size % sizeof(struct bpf_stack_build_id) || value_size / sizeof(struct bpf_stack_build_id) > sysctl_perf_event_max_stack) return ERR_PTR(-EINVAL); } else if (value_size / 8 > sysctl_perf_event_max_stack) return ERR_PTR(-EINVAL); /* hash table size must be power of 2; roundup_pow_of_two() can overflow * into UB on 32-bit arches, so check that first */ if (attr->max_entries > 1UL << 31) return ERR_PTR(-E2BIG); n_buckets = roundup_pow_of_two(attr->max_entries); cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); if (!smap) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&smap->map, attr); smap->n_buckets = n_buckets; err = get_callchain_buffers(sysctl_perf_event_max_stack); if (err) goto free_smap; err = prealloc_elems_and_freelist(smap); if (err) goto put_buffers; return &smap->map; put_buffers: put_callchain_buffers(); free_smap: bpf_map_area_free(smap); return ERR_PTR(err); } static int fetch_build_id(struct vm_area_struct *vma, unsigned char *build_id, bool may_fault) { return may_fault ? build_id_parse(vma, build_id, NULL) : build_id_parse_nofault(vma, build_id, NULL); } /* * Expects all id_offs[i].ip values to be set to correct initial IPs. * They will be subsequently: * - either adjusted in place to a file offset, if build ID fetching * succeeds; in this case id_offs[i].build_id is set to correct build ID, * and id_offs[i].status is set to BPF_STACK_BUILD_ID_VALID; * - or IP will be kept intact, if build ID fetching failed; in this case * id_offs[i].build_id is zeroed out and id_offs[i].status is set to * BPF_STACK_BUILD_ID_IP. */ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, u32 trace_nr, bool user, bool may_fault) { int i; struct mmap_unlock_irq_work *work = NULL; bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work); struct vm_area_struct *vma, *prev_vma = NULL; const char *prev_build_id; /* If the irq_work is in use, fall back to report ips. Same * fallback is used for kernel stack (!user) on a stackmap with * build_id. */ if (!user || !current || !current->mm || irq_work_busy || !mmap_read_trylock(current->mm)) { /* cannot access current->mm, fall back to ips */ for (i = 0; i < trace_nr; i++) { id_offs[i].status = BPF_STACK_BUILD_ID_IP; memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); } return; } for (i = 0; i < trace_nr; i++) { u64 ip = READ_ONCE(id_offs[i].ip); if (range_in_vma(prev_vma, ip, ip)) { vma = prev_vma; memcpy(id_offs[i].build_id, prev_build_id, BUILD_ID_SIZE_MAX); goto build_id_valid; } vma = find_vma(current->mm, ip); if (!vma || fetch_build_id(vma, id_offs[i].build_id, may_fault)) { /* per entry fall back to ips */ id_offs[i].status = BPF_STACK_BUILD_ID_IP; memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); continue; } build_id_valid: id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ip - vma->vm_start; id_offs[i].status = BPF_STACK_BUILD_ID_VALID; prev_vma = vma; prev_build_id = id_offs[i].build_id; } bpf_mmap_unlock_mm(work, current->mm); } static struct perf_callchain_entry * get_callchain_entry_for_task(struct task_struct *task, u32 max_depth) { #ifdef CONFIG_STACKTRACE struct perf_callchain_entry *entry; int rctx; entry = get_callchain_entry(&rctx); if (!entry) return NULL; entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip, max_depth, 0); /* stack_trace_save_tsk() works on unsigned long array, while * perf_callchain_entry uses u64 array. For 32-bit systems, it is * necessary to fix this mismatch. */ if (__BITS_PER_LONG != 64) { unsigned long *from = (unsigned long *) entry->ip; u64 *to = entry->ip; int i; /* copy data from the end to avoid using extra buffer */ for (i = entry->nr - 1; i >= 0; i--) to[i] = (u64)(from[i]); } put_callchain_entry(rctx); return entry; #else /* CONFIG_STACKTRACE */ return NULL; #endif } static long __bpf_get_stackid(struct bpf_map *map, struct perf_callchain_entry *trace, u64 flags) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *bucket, *new_bucket, *old_bucket; u32 skip = flags & BPF_F_SKIP_FIELD_MASK; u32 hash, id, trace_nr, trace_len, i; bool user = flags & BPF_F_USER_STACK; u64 *ips; bool hash_matches; if (trace->nr <= skip) /* skipping more than usable stack trace */ return -EFAULT; trace_nr = trace->nr - skip; trace_len = trace_nr * sizeof(u64); ips = trace->ip + skip; hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); id = hash & (smap->n_buckets - 1); bucket = READ_ONCE(smap->buckets[id]); hash_matches = bucket && bucket->hash == hash; /* fast cmp */ if (hash_matches && flags & BPF_F_FAST_STACK_CMP) return id; if (stack_map_use_build_id(map)) { struct bpf_stack_build_id *id_offs; /* for build_id+offset, pop a bucket before slow cmp */ new_bucket = (struct stack_map_bucket *) pcpu_freelist_pop(&smap->freelist); if (unlikely(!new_bucket)) return -ENOMEM; new_bucket->nr = trace_nr; id_offs = (struct bpf_stack_build_id *)new_bucket->data; for (i = 0; i < trace_nr; i++) id_offs[i].ip = ips[i]; stack_map_get_build_id_offset(id_offs, trace_nr, user, false /* !may_fault */); trace_len = trace_nr * sizeof(struct bpf_stack_build_id); if (hash_matches && bucket->nr == trace_nr && memcmp(bucket->data, new_bucket->data, trace_len) == 0) { pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); return id; } if (bucket && !(flags & BPF_F_REUSE_STACKID)) { pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); return -EEXIST; } } else { if (hash_matches && bucket->nr == trace_nr && memcmp(bucket->data, ips, trace_len) == 0) return id; if (bucket && !(flags & BPF_F_REUSE_STACKID)) return -EEXIST; new_bucket = (struct stack_map_bucket *) pcpu_freelist_pop(&smap->freelist); if (unlikely(!new_bucket)) return -ENOMEM; memcpy(new_bucket->data, ips, trace_len); } new_bucket->hash = hash; new_bucket->nr = trace_nr; old_bucket = xchg(&smap->buckets[id], new_bucket); if (old_bucket) pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); return id; } BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, u64, flags) { u32 max_depth = map->value_size / stack_map_data_size(map); u32 skip = flags & BPF_F_SKIP_FIELD_MASK; bool user = flags & BPF_F_USER_STACK; struct perf_callchain_entry *trace; bool kernel = !user; if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) return -EINVAL; max_depth += skip; if (max_depth > sysctl_perf_event_max_stack) max_depth = sysctl_perf_event_max_stack; trace = get_perf_callchain(regs, 0, kernel, user, max_depth, false, false); if (unlikely(!trace)) /* couldn't fetch the stack trace */ return -EFAULT; return __bpf_get_stackid(map, trace, flags); } const struct bpf_func_proto bpf_get_stackid_proto = { .func = bpf_get_stackid, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, }; static __u64 count_kernel_ip(struct perf_callchain_entry *trace) { __u64 nr_kernel = 0; while (nr_kernel < trace->nr) { if (trace->ip[nr_kernel] == PERF_CONTEXT_USER) break; nr_kernel++; } return nr_kernel; } BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx, struct bpf_map *, map, u64, flags) { struct perf_event *event = ctx->event; struct perf_callchain_entry *trace; bool kernel, user; __u64 nr_kernel; int ret; /* perf_sample_data doesn't have callchain, use bpf_get_stackid */ if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) return bpf_get_stackid((unsigned long)(ctx->regs), (unsigned long) map, flags, 0, 0); if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) return -EINVAL; user = flags & BPF_F_USER_STACK; kernel = !user; trace = ctx->data->callchain; if (unlikely(!trace)) return -EFAULT; nr_kernel = count_kernel_ip(trace); if (kernel) { __u64 nr = trace->nr; trace->nr = nr_kernel; ret = __bpf_get_stackid(map, trace, flags); /* restore nr */ trace->nr = nr; } else { /* user */ u64 skip = flags & BPF_F_SKIP_FIELD_MASK; skip += nr_kernel; if (skip > BPF_F_SKIP_FIELD_MASK) return -EFAULT; flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; ret = __bpf_get_stackid(map, trace, flags); } return ret; } const struct bpf_func_proto bpf_get_stackid_proto_pe = { .func = bpf_get_stackid_pe, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, }; static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, struct perf_callchain_entry *trace_in, void *buf, u32 size, u64 flags, bool may_fault) { u32 trace_nr, copy_len, elem_size, num_elem, max_depth; bool user_build_id = flags & BPF_F_USER_BUILD_ID; bool crosstask = task && task != current; u32 skip = flags & BPF_F_SKIP_FIELD_MASK; bool user = flags & BPF_F_USER_STACK; struct perf_callchain_entry *trace; bool kernel = !user; int err = -EINVAL; u64 *ips; if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | BPF_F_USER_BUILD_ID))) goto clear; if (kernel && user_build_id) goto clear; elem_size = user_build_id ? sizeof(struct bpf_stack_build_id) : sizeof(u64); if (unlikely(size % elem_size)) goto clear; /* cannot get valid user stack for task without user_mode regs */ if (task && user && !user_mode(regs)) goto err_fault; /* get_perf_callchain does not support crosstask user stack walking * but returns an empty stack instead of NULL. */ if (crosstask && user) { err = -EOPNOTSUPP; goto clear; } num_elem = size / elem_size; max_depth = num_elem + skip; if (sysctl_perf_event_max_stack < max_depth) max_depth = sysctl_perf_event_max_stack; if (may_fault) rcu_read_lock(); /* need RCU for perf's callchain below */ if (trace_in) trace = trace_in; else if (kernel && task) trace = get_callchain_entry_for_task(task, max_depth); else trace = get_perf_callchain(regs, 0, kernel, user, max_depth, crosstask, false); if (unlikely(!trace) || trace->nr < skip) { if (may_fault) rcu_read_unlock(); goto err_fault; } trace_nr = trace->nr - skip; trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; copy_len = trace_nr * elem_size; ips = trace->ip + skip; if (user_build_id) { struct bpf_stack_build_id *id_offs = buf; u32 i; for (i = 0; i < trace_nr; i++) id_offs[i].ip = ips[i]; } else { memcpy(buf, ips, copy_len); } /* trace/ips should not be dereferenced after this point */ if (may_fault) rcu_read_unlock(); if (user_build_id) stack_map_get_build_id_offset(buf, trace_nr, user, may_fault); if (size > copy_len) memset(buf + copy_len, 0, size - copy_len); return copy_len; err_fault: err = -EFAULT; clear: memset(buf, 0, size); return err; } BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, u64, flags) { return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, false /* !may_fault */); } const struct bpf_func_proto bpf_get_stack_proto = { .func = bpf_get_stack, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; BPF_CALL_4(bpf_get_stack_sleepable, struct pt_regs *, regs, void *, buf, u32, size, u64, flags) { return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, true /* may_fault */); } const struct bpf_func_proto bpf_get_stack_sleepable_proto = { .func = bpf_get_stack_sleepable, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; static long __bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags, bool may_fault) { struct pt_regs *regs; long res = -EINVAL; if (!try_get_task_stack(task)) return -EFAULT; regs = task_pt_regs(task); if (regs) res = __bpf_get_stack(regs, task, NULL, buf, size, flags, may_fault); put_task_stack(task); return res; } BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, u32, size, u64, flags) { return __bpf_get_task_stack(task, buf, size, flags, false /* !may_fault */); } const struct bpf_func_proto bpf_get_task_stack_proto = { .func = bpf_get_task_stack, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; BPF_CALL_4(bpf_get_task_stack_sleepable, struct task_struct *, task, void *, buf, u32, size, u64, flags) { return __bpf_get_task_stack(task, buf, size, flags, true /* !may_fault */); } const struct bpf_func_proto bpf_get_task_stack_sleepable_proto = { .func = bpf_get_task_stack_sleepable, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx, void *, buf, u32, size, u64, flags) { struct pt_regs *regs = (struct pt_regs *)(ctx->regs); struct perf_event *event = ctx->event; struct perf_callchain_entry *trace; bool kernel, user; int err = -EINVAL; __u64 nr_kernel; if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, false /* !may_fault */); if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | BPF_F_USER_BUILD_ID))) goto clear; user = flags & BPF_F_USER_STACK; kernel = !user; err = -EFAULT; trace = ctx->data->callchain; if (unlikely(!trace)) goto clear; nr_kernel = count_kernel_ip(trace); if (kernel) { __u64 nr = trace->nr; trace->nr = nr_kernel; err = __bpf_get_stack(regs, NULL, trace, buf, size, flags, false /* !may_fault */); /* restore nr */ trace->nr = nr; } else { /* user */ u64 skip = flags & BPF_F_SKIP_FIELD_MASK; skip += nr_kernel; if (skip > BPF_F_SKIP_FIELD_MASK) goto clear; flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; err = __bpf_get_stack(regs, NULL, trace, buf, size, flags, false /* !may_fault */); } return err; clear: memset(buf, 0, size); return err; } const struct bpf_func_proto bpf_get_stack_proto_pe = { .func = bpf_get_stack_pe, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; /* Called from eBPF program */ static void *stack_map_lookup_elem(struct bpf_map *map, void *key) { return ERR_PTR(-EOPNOTSUPP); } /* Called from syscall */ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *bucket, *old_bucket; u32 id = *(u32 *)key, trace_len; if (unlikely(id >= smap->n_buckets)) return -ENOENT; bucket = xchg(&smap->buckets[id], NULL); if (!bucket) return -ENOENT; trace_len = bucket->nr * stack_map_data_size(map); memcpy(value, bucket->data, trace_len); memset(value + trace_len, 0, map->value_size - trace_len); old_bucket = xchg(&smap->buckets[id], bucket); if (old_bucket) pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); return 0; } static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); u32 id; WARN_ON_ONCE(!rcu_read_lock_held()); if (!key) { id = 0; } else { id = *(u32 *)key; if (id >= smap->n_buckets || !smap->buckets[id]) id = 0; else id++; } while (id < smap->n_buckets && !smap->buckets[id]) id++; if (id >= smap->n_buckets) return -ENOENT; *(u32 *)next_key = id; return 0; } static long stack_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { return -EINVAL; } /* Called from syscall or from eBPF program */ static long stack_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *old_bucket; u32 id = *(u32 *)key; if (unlikely(id >= smap->n_buckets)) return -E2BIG; old_bucket = xchg(&smap->buckets[id], NULL); if (old_bucket) { pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); return 0; } else { return -ENOENT; } } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ static void stack_map_free(struct bpf_map *map) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); bpf_map_area_free(smap->elems); pcpu_freelist_destroy(&smap->freelist); bpf_map_area_free(smap); put_callchain_buffers(); } static u64 stack_map_mem_usage(const struct bpf_map *map) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); u64 value_size = map->value_size; u64 n_buckets = smap->n_buckets; u64 enties = map->max_entries; u64 usage = sizeof(*smap); usage += n_buckets * sizeof(struct stack_map_bucket *); usage += enties * (sizeof(struct stack_map_bucket) + value_size); return usage; } BTF_ID_LIST_SINGLE(stack_trace_map_btf_ids, struct, bpf_stack_map) const struct bpf_map_ops stack_trace_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = stack_map_alloc, .map_free = stack_map_free, .map_get_next_key = stack_map_get_next_key, .map_lookup_elem = stack_map_lookup_elem, .map_update_elem = stack_map_update_elem, .map_delete_elem = stack_map_delete_elem, .map_check_btf = map_check_no_btf, .map_mem_usage = stack_map_mem_usage, .map_btf_id = &stack_trace_map_btf_ids[0], };
676 1129 1125 1126 254 160 239 228 162 152 161 157 1125 158 124 96 131 1129 1097 1094 323 324 103 325 323 315 315 312 314 246 324 325 323 325 246 229 322 191 338 338 136 51 111 104 104 104 78 76 11 76 76 105 102 79 90 8 103 98 97 98 605 590 592 591 605 1016 1098 589 1003 1004 1003 676 674 674 676 3 3 3 31 31 31 53 40 52 89 89 89 89 89 77 77 77 77 77 66 66 66 248 1004 2 2 2 508 504 507 507 1 306 303 305 306 1 89 88 88 88 89 77 78 77 77 78 66 65 66 248 1001 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/core/dev_addr_lists.c - Functions for handling net device lists * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com> * * This file contains functions for working with unicast, multicast and device * addresses lists. */ #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/export.h> #include <linux/list.h> #include "dev.h" /* * General list handling functions */ static int __hw_addr_insert(struct netdev_hw_addr_list *list, struct netdev_hw_addr *new, int addr_len) { struct rb_node **ins_point = &list->tree.rb_node, *parent = NULL; struct netdev_hw_addr *ha; while (*ins_point) { int diff; ha = rb_entry(*ins_point, struct netdev_hw_addr, node); diff = memcmp(new->addr, ha->addr, addr_len); if (diff == 0) diff = memcmp(&new->type, &ha->type, sizeof(new->type)); parent = *ins_point; if (diff < 0) ins_point = &parent->rb_left; else if (diff > 0) ins_point = &parent->rb_right; else return -EEXIST; } rb_link_node_rcu(&new->node, parent, ins_point); rb_insert_color(&new->node, &list->tree); return 0; } static struct netdev_hw_addr* __hw_addr_create(const unsigned char *addr, int addr_len, unsigned char addr_type, bool global, bool sync) { struct netdev_hw_addr *ha; int alloc_size; alloc_size = sizeof(*ha); if (alloc_size < L1_CACHE_BYTES) alloc_size = L1_CACHE_BYTES; ha = kmalloc(alloc_size, GFP_ATOMIC); if (!ha) return NULL; memcpy(ha->addr, addr, addr_len); ha->type = addr_type; ha->refcount = 1; ha->global_use = global; ha->synced = sync ? 1 : 0; ha->sync_cnt = 0; return ha; } static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type, bool global, bool sync, int sync_count, bool exclusive) { struct rb_node **ins_point = &list->tree.rb_node, *parent = NULL; struct netdev_hw_addr *ha; if (addr_len > MAX_ADDR_LEN) return -EINVAL; while (*ins_point) { int diff; ha = rb_entry(*ins_point, struct netdev_hw_addr, node); diff = memcmp(addr, ha->addr, addr_len); if (diff == 0) diff = memcmp(&addr_type, &ha->type, sizeof(addr_type)); parent = *ins_point; if (diff < 0) { ins_point = &parent->rb_left; } else if (diff > 0) { ins_point = &parent->rb_right; } else { if (exclusive) return -EEXIST; if (global) { /* check if addr is already used as global */ if (ha->global_use) return 0; else ha->global_use = true; } if (sync) { if (ha->synced && sync_count) return -EEXIST; else ha->synced++; } ha->refcount++; return 0; } } ha = __hw_addr_create(addr, addr_len, addr_type, global, sync); if (!ha) return -ENOMEM; rb_link_node(&ha->node, parent, ins_point); rb_insert_color(&ha->node, &list->tree); list_add_tail_rcu(&ha->list, &list->list); list->count++; return 0; } static int __hw_addr_add(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false, 0, false); } static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, struct netdev_hw_addr *ha, bool global, bool sync) { if (global && !ha->global_use) return -ENOENT; if (sync && !ha->synced) return -ENOENT; if (global) ha->global_use = false; if (sync) ha->synced--; if (--ha->refcount) return 0; rb_erase(&ha->node, &list->tree); list_del_rcu(&ha->list); kfree_rcu(ha, rcu_head); list->count--; return 0; } static struct netdev_hw_addr *__hw_addr_lookup(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { struct rb_node *node; node = list->tree.rb_node; while (node) { struct netdev_hw_addr *ha = rb_entry(node, struct netdev_hw_addr, node); int diff = memcmp(addr, ha->addr, addr_len); if (diff == 0 && addr_type) diff = memcmp(&addr_type, &ha->type, sizeof(addr_type)); if (diff < 0) node = node->rb_left; else if (diff > 0) node = node->rb_right; else return ha; } return NULL; } static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type, bool global, bool sync) { struct netdev_hw_addr *ha = __hw_addr_lookup(list, addr, addr_len, addr_type); if (!ha) return -ENOENT; return __hw_addr_del_entry(list, ha, global, sync); } static int __hw_addr_del(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { return __hw_addr_del_ex(list, addr, addr_len, addr_type, false, false); } static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr *ha, int addr_len) { int err; err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, false, true, ha->sync_cnt, false); if (err && err != -EEXIST) return err; if (!err) { ha->sync_cnt++; ha->refcount++; } return 0; } static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, struct netdev_hw_addr *ha, int addr_len) { int err; err = __hw_addr_del_ex(to_list, ha->addr, addr_len, ha->type, false, true); if (err) return; ha->sync_cnt--; /* address on from list is not marked synced */ __hw_addr_del_entry(from_list, ha, false, false); } int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len) { int err = 0; struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (ha->sync_cnt == ha->refcount) { __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } else { err = __hw_addr_sync_one(to_list, ha, addr_len); if (err) break; } } return err; } EXPORT_SYMBOL(__hw_addr_sync_multiple); /* This function only works where there is a strict 1-1 relationship * between source and destination of they synch. If you ever need to * sync addresses to more then 1 destination, you need to use * __hw_addr_sync_multiple(). */ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len) { int err = 0; struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (!ha->sync_cnt) { err = __hw_addr_sync_one(to_list, ha, addr_len); if (err) break; } else if (ha->refcount == 1) __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } return err; } EXPORT_SYMBOL(__hw_addr_sync); void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len) { struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (ha->sync_cnt) __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } } EXPORT_SYMBOL(__hw_addr_unsync); /** * __hw_addr_sync_dev - Synchronize device's multicast list * @list: address list to synchronize * @dev: device to sync * @sync: function to call if address should be added * @unsync: function to call if address should be removed * * This function is intended to be called from the ndo_set_rx_mode * function of devices that require explicit address add/remove * notifications. The unsync function may be NULL in which case * the addresses requiring removal will simply be removed without * any notification to the device. **/ int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *), int (*unsync)(struct net_device *, const unsigned char *)) { struct netdev_hw_addr *ha, *tmp; int err; /* first go through and flush out any stale entries */ list_for_each_entry_safe(ha, tmp, &list->list, list) { if (!ha->sync_cnt || ha->refcount != 1) continue; /* if unsync is defined and fails defer unsyncing address */ if (unsync && unsync(dev, ha->addr)) continue; ha->sync_cnt--; __hw_addr_del_entry(list, ha, false, false); } /* go through and sync new entries to the list */ list_for_each_entry_safe(ha, tmp, &list->list, list) { if (ha->sync_cnt) continue; err = sync(dev, ha->addr); if (err) return err; ha->sync_cnt++; ha->refcount++; } return 0; } EXPORT_SYMBOL(__hw_addr_sync_dev); /** * __hw_addr_ref_sync_dev - Synchronize device's multicast address list taking * into account references * @list: address list to synchronize * @dev: device to sync * @sync: function to call if address or reference on it should be added * @unsync: function to call if address or some reference on it should removed * * This function is intended to be called from the ndo_set_rx_mode * function of devices that require explicit address or references on it * add/remove notifications. The unsync function may be NULL in which case * the addresses or references on it requiring removal will simply be * removed without any notification to the device. That is responsibility of * the driver to identify and distribute address or references on it between * internal address tables. **/ int __hw_addr_ref_sync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *, int), int (*unsync)(struct net_device *, const unsigned char *, int)) { struct netdev_hw_addr *ha, *tmp; int err, ref_cnt; /* first go through and flush out any unsynced/stale entries */ list_for_each_entry_safe(ha, tmp, &list->list, list) { /* sync if address is not used */ if ((ha->sync_cnt << 1) <= ha->refcount) continue; /* if fails defer unsyncing address */ ref_cnt = ha->refcount - ha->sync_cnt; if (unsync && unsync(dev, ha->addr, ref_cnt)) continue; ha->refcount = (ref_cnt << 1) + 1; ha->sync_cnt = ref_cnt; __hw_addr_del_entry(list, ha, false, false); } /* go through and sync updated/new entries to the list */ list_for_each_entry_safe(ha, tmp, &list->list, list) { /* sync if address added or reused */ if ((ha->sync_cnt << 1) >= ha->refcount) continue; ref_cnt = ha->refcount - ha->sync_cnt; err = sync(dev, ha->addr, ref_cnt); if (err) return err; ha->refcount = ref_cnt << 1; ha->sync_cnt = ref_cnt; } return 0; } EXPORT_SYMBOL(__hw_addr_ref_sync_dev); /** * __hw_addr_ref_unsync_dev - Remove synchronized addresses and references on * it from device * @list: address list to remove synchronized addresses (references on it) from * @dev: device to sync * @unsync: function to call if address and references on it should be removed * * Remove all addresses that were added to the device by * __hw_addr_ref_sync_dev(). This function is intended to be called from the * ndo_stop or ndo_open functions on devices that require explicit address (or * references on it) add/remove notifications. If the unsync function pointer * is NULL then this function can be used to just reset the sync_cnt for the * addresses in the list. **/ void __hw_addr_ref_unsync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *, int)) { struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &list->list, list) { if (!ha->sync_cnt) continue; /* if fails defer unsyncing address */ if (unsync && unsync(dev, ha->addr, ha->sync_cnt)) continue; ha->refcount -= ha->sync_cnt - 1; ha->sync_cnt = 0; __hw_addr_del_entry(list, ha, false, false); } } EXPORT_SYMBOL(__hw_addr_ref_unsync_dev); /** * __hw_addr_unsync_dev - Remove synchronized addresses from device * @list: address list to remove synchronized addresses from * @dev: device to sync * @unsync: function to call if address should be removed * * Remove all addresses that were added to the device by __hw_addr_sync_dev(). * This function is intended to be called from the ndo_stop or ndo_open * functions on devices that require explicit address add/remove * notifications. If the unsync function pointer is NULL then this function * can be used to just reset the sync_cnt for the addresses in the list. **/ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *)) { struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &list->list, list) { if (!ha->sync_cnt) continue; /* if unsync is defined and fails defer unsyncing address */ if (unsync && unsync(dev, ha->addr)) continue; ha->sync_cnt--; __hw_addr_del_entry(list, ha, false, false); } } EXPORT_SYMBOL(__hw_addr_unsync_dev); static void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; list->tree = RB_ROOT; list_for_each_entry_safe(ha, tmp, &list->list, list) { list_del_rcu(&ha->list); kfree_rcu(ha, rcu_head); } list->count = 0; } void __hw_addr_init(struct netdev_hw_addr_list *list) { INIT_LIST_HEAD(&list->list); list->count = 0; list->tree = RB_ROOT; } EXPORT_SYMBOL(__hw_addr_init); /* * Device addresses handling functions */ /* Check that netdev->dev_addr is not written to directly as this would * break the rbtree layout. All changes should go thru dev_addr_set() and co. * Remove this check in mid-2024. */ void dev_addr_check(struct net_device *dev) { if (!memcmp(dev->dev_addr, dev->dev_addr_shadow, MAX_ADDR_LEN)) return; netdev_warn(dev, "Current addr: %*ph\n", MAX_ADDR_LEN, dev->dev_addr); netdev_warn(dev, "Expected addr: %*ph\n", MAX_ADDR_LEN, dev->dev_addr_shadow); netdev_WARN(dev, "Incorrect netdev->dev_addr\n"); } /** * dev_addr_flush - Flush device address list * @dev: device * * Flush device address list and reset ->dev_addr. * * The caller must hold the rtnl_mutex. */ void dev_addr_flush(struct net_device *dev) { /* rtnl_mutex must be held here */ dev_addr_check(dev); __hw_addr_flush(&dev->dev_addrs); dev->dev_addr = NULL; } /** * dev_addr_init - Init device address list * @dev: device * * Init device address list and create the first element, * used by ->dev_addr. * * The caller must hold the rtnl_mutex. */ int dev_addr_init(struct net_device *dev) { unsigned char addr[MAX_ADDR_LEN]; struct netdev_hw_addr *ha; int err; /* rtnl_mutex must be held here */ __hw_addr_init(&dev->dev_addrs); memset(addr, 0, sizeof(addr)); err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr), NETDEV_HW_ADDR_T_LAN); if (!err) { /* * Get the first (previously created) address from the list * and set dev_addr pointer to this location. */ ha = list_first_entry(&dev->dev_addrs.list, struct netdev_hw_addr, list); dev->dev_addr = ha->addr; } return err; } void dev_addr_mod(struct net_device *dev, unsigned int offset, const void *addr, size_t len) { struct netdev_hw_addr *ha; dev_addr_check(dev); ha = container_of(dev->dev_addr, struct netdev_hw_addr, addr[0]); rb_erase(&ha->node, &dev->dev_addrs.tree); memcpy(&ha->addr[offset], addr, len); memcpy(&dev->dev_addr_shadow[offset], addr, len); WARN_ON(__hw_addr_insert(&dev->dev_addrs, ha, dev->addr_len)); } EXPORT_SYMBOL(dev_addr_mod); /** * dev_addr_add - Add a device address * @dev: device * @addr: address to add * @addr_type: address type * * Add a device address to the device or increase the reference count if * it already exists. * * The caller must hold the rtnl_mutex. */ int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type) { int err; ASSERT_RTNL(); err = dev_pre_changeaddr_notify(dev, addr, NULL); if (err) return err; err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; } EXPORT_SYMBOL(dev_addr_add); /** * dev_addr_del - Release a device address. * @dev: device * @addr: address to delete * @addr_type: address type * * Release reference to a device address and remove it from the device * if the reference count drops to zero. * * The caller must hold the rtnl_mutex. */ int dev_addr_del(struct net_device *dev, const unsigned char *addr, unsigned char addr_type) { int err; struct netdev_hw_addr *ha; ASSERT_RTNL(); /* * We can not remove the first address from the list because * dev->dev_addr points to that. */ ha = list_first_entry(&dev->dev_addrs.list, struct netdev_hw_addr, list); if (!memcmp(ha->addr, addr, dev->addr_len) && ha->type == addr_type && ha->refcount == 1) return -ENOENT; err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; } EXPORT_SYMBOL(dev_addr_del); /* * Unicast list handling functions */ /** * dev_uc_add_excl - Add a global secondary unicast address * @dev: device * @addr: address to add */ int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr) { int err; netif_addr_lock_bh(dev); err = __hw_addr_add_ex(&dev->uc, addr, dev->addr_len, NETDEV_HW_ADDR_T_UNICAST, true, false, 0, true); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_uc_add_excl); /** * dev_uc_add - Add a secondary unicast address * @dev: device * @addr: address to add * * Add a secondary unicast address to the device or increase * the reference count if it already exists. */ int dev_uc_add(struct net_device *dev, const unsigned char *addr) { int err; netif_addr_lock_bh(dev); err = __hw_addr_add(&dev->uc, addr, dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_uc_add); /** * dev_uc_del - Release secondary unicast address. * @dev: device * @addr: address to delete * * Release reference to a secondary unicast address and remove it * from the device if the reference count drops to zero. */ int dev_uc_del(struct net_device *dev, const unsigned char *addr) { int err; netif_addr_lock_bh(dev); err = __hw_addr_del(&dev->uc, addr, dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_uc_del); /** * dev_uc_sync - Synchronize device's unicast list to another device * @to: destination device * @from: source device * * Add newly added addresses to the destination device and release * addresses that have no users left. The source device must be * locked by netif_addr_lock_bh. * * This function is intended to be called from the dev->set_rx_mode * function of layered software devices. This function assumes that * addresses will only ever be synced to the @to devices and no other. */ int dev_uc_sync(struct net_device *to, struct net_device *from) { int err = 0; if (to->addr_len != from->addr_len) return -EINVAL; netif_addr_lock(to); err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); netif_addr_unlock(to); return err; } EXPORT_SYMBOL(dev_uc_sync); /** * dev_uc_sync_multiple - Synchronize device's unicast list to another * device, but allow for multiple calls to sync to multiple devices. * @to: destination device * @from: source device * * Add newly added addresses to the destination device and release * addresses that have been deleted from the source. The source device * must be locked by netif_addr_lock_bh. * * This function is intended to be called from the dev->set_rx_mode * function of layered software devices. It allows for a single source * device to be synced to multiple destination devices. */ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from) { int err = 0; if (to->addr_len != from->addr_len) return -EINVAL; netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); netif_addr_unlock(to); return err; } EXPORT_SYMBOL(dev_uc_sync_multiple); /** * dev_uc_unsync - Remove synchronized addresses from the destination device * @to: destination device * @from: source device * * Remove all addresses that were added to the destination device by * dev_uc_sync(). This function is intended to be called from the * dev->stop function of layered software devices. */ void dev_uc_unsync(struct net_device *to, struct net_device *from) { if (to->addr_len != from->addr_len) return; /* netif_addr_lock_bh() uses lockdep subclass 0, this is okay for two * reasons: * 1) This is always called without any addr_list_lock, so as the * outermost one here, it must be 0. * 2) This is called by some callers after unlinking the upper device, * so the dev->lower_level becomes 1 again. * Therefore, the subclass for 'from' is 0, for 'to' is either 1 or * larger. */ netif_addr_lock_bh(from); netif_addr_lock(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_uc_unsync); /** * dev_uc_flush - Flush unicast addresses * @dev: device * * Flush unicast addresses. */ void dev_uc_flush(struct net_device *dev) { netif_addr_lock_bh(dev); __hw_addr_flush(&dev->uc); netif_addr_unlock_bh(dev); } EXPORT_SYMBOL(dev_uc_flush); /** * dev_uc_init - Init unicast address list * @dev: device * * Init unicast address list. */ void dev_uc_init(struct net_device *dev) { __hw_addr_init(&dev->uc); } EXPORT_SYMBOL(dev_uc_init); /* * Multicast list handling functions */ /** * dev_mc_add_excl - Add a global secondary multicast address * @dev: device * @addr: address to add */ int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr) { int err; netif_addr_lock_bh(dev); err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, NETDEV_HW_ADDR_T_MULTICAST, true, false, 0, true); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_mc_add_excl); static int __dev_mc_add(struct net_device *dev, const unsigned char *addr, bool global) { int err; netif_addr_lock_bh(dev); err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, NETDEV_HW_ADDR_T_MULTICAST, global, false, 0, false); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } /** * dev_mc_add - Add a multicast address * @dev: device * @addr: address to add * * Add a multicast address to the device or increase * the reference count if it already exists. */ int dev_mc_add(struct net_device *dev, const unsigned char *addr) { return __dev_mc_add(dev, addr, false); } EXPORT_SYMBOL(dev_mc_add); /** * dev_mc_add_global - Add a global multicast address * @dev: device * @addr: address to add * * Add a global multicast address to the device. */ int dev_mc_add_global(struct net_device *dev, const unsigned char *addr) { return __dev_mc_add(dev, addr, true); } EXPORT_SYMBOL(dev_mc_add_global); static int __dev_mc_del(struct net_device *dev, const unsigned char *addr, bool global) { int err; netif_addr_lock_bh(dev); err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len, NETDEV_HW_ADDR_T_MULTICAST, global, false); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); return err; } /** * dev_mc_del - Delete a multicast address. * @dev: device * @addr: address to delete * * Release reference to a multicast address and remove it * from the device if the reference count drops to zero. */ int dev_mc_del(struct net_device *dev, const unsigned char *addr) { return __dev_mc_del(dev, addr, false); } EXPORT_SYMBOL(dev_mc_del); /** * dev_mc_del_global - Delete a global multicast address. * @dev: device * @addr: address to delete * * Release reference to a multicast address and remove it * from the device if the reference count drops to zero. */ int dev_mc_del_global(struct net_device *dev, const unsigned char *addr) { return __dev_mc_del(dev, addr, true); } EXPORT_SYMBOL(dev_mc_del_global); /** * dev_mc_sync - Synchronize device's multicast list to another device * @to: destination device * @from: source device * * Add newly added addresses to the destination device and release * addresses that have no users left. The source device must be * locked by netif_addr_lock_bh. * * This function is intended to be called from the ndo_set_rx_mode * function of layered software devices. */ int dev_mc_sync(struct net_device *to, struct net_device *from) { int err = 0; if (to->addr_len != from->addr_len) return -EINVAL; netif_addr_lock(to); err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); netif_addr_unlock(to); return err; } EXPORT_SYMBOL(dev_mc_sync); /** * dev_mc_sync_multiple - Synchronize device's multicast list to another * device, but allow for multiple calls to sync to multiple devices. * @to: destination device * @from: source device * * Add newly added addresses to the destination device and release * addresses that have no users left. The source device must be * locked by netif_addr_lock_bh. * * This function is intended to be called from the ndo_set_rx_mode * function of layered software devices. It allows for a single * source device to be synced to multiple destination devices. */ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) { int err = 0; if (to->addr_len != from->addr_len) return -EINVAL; netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); netif_addr_unlock(to); return err; } EXPORT_SYMBOL(dev_mc_sync_multiple); /** * dev_mc_unsync - Remove synchronized addresses from the destination device * @to: destination device * @from: source device * * Remove all addresses that were added to the destination device by * dev_mc_sync(). This function is intended to be called from the * dev->stop function of layered software devices. */ void dev_mc_unsync(struct net_device *to, struct net_device *from) { if (to->addr_len != from->addr_len) return; /* See the above comments inside dev_uc_unsync(). */ netif_addr_lock_bh(from); netif_addr_lock(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_mc_unsync); /** * dev_mc_flush - Flush multicast addresses * @dev: device * * Flush multicast addresses. */ void dev_mc_flush(struct net_device *dev) { netif_addr_lock_bh(dev); __hw_addr_flush(&dev->mc); netif_addr_unlock_bh(dev); } EXPORT_SYMBOL(dev_mc_flush); /** * dev_mc_init - Init multicast address list * @dev: device * * Init multicast address list. */ void dev_mc_init(struct net_device *dev) { __hw_addr_init(&dev->mc); } EXPORT_SYMBOL(dev_mc_init);
4 4 9487 224 9487 1 599 9440 4999 11 110 110 106 2 106 7 110 32317 32993 35172 35151 33142 32969 3 3 35283 3 3 6632 32317 33152 33147 33142 9 33201 33151 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MMZONE_H #define _LINUX_MMZONE_H #ifndef __ASSEMBLY__ #ifndef __GENERATING_BOUNDS_H #include <linux/spinlock.h> #include <linux/list.h> #include <linux/list_nulls.h> #include <linux/wait.h> #include <linux/bitops.h> #include <linux/cache.h> #include <linux/threads.h> #include <linux/numa.h> #include <linux/init.h> #include <linux/seqlock.h> #include <linux/nodemask.h> #include <linux/pageblock-flags.h> #include <linux/page-flags-layout.h> #include <linux/atomic.h> #include <linux/mm_types.h> #include <linux/page-flags.h> #include <linux/local_lock.h> #include <linux/zswap.h> #include <asm/page.h> /* Free memory management - zoned buddy allocator. */ #ifndef CONFIG_ARCH_FORCE_MAX_ORDER #define MAX_PAGE_ORDER 10 #else #define MAX_PAGE_ORDER CONFIG_ARCH_FORCE_MAX_ORDER #endif #define MAX_ORDER_NR_PAGES (1 << MAX_PAGE_ORDER) #define IS_MAX_ORDER_ALIGNED(pfn) IS_ALIGNED(pfn, MAX_ORDER_NR_PAGES) #define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1) /* * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed * costly to service. That is between allocation orders which should * coalesce naturally under reasonable reclaim pressure and those which * will not. */ #define PAGE_ALLOC_COSTLY_ORDER 3 enum migratetype { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RECLAIMABLE, MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES, #ifdef CONFIG_CMA /* * MIGRATE_CMA migration type is designed to mimic the way * ZONE_MOVABLE works. Only movable pages can be allocated * from MIGRATE_CMA pageblocks and page allocator never * implicitly change migration type of MIGRATE_CMA pageblock. * * The way to use it is to change migratetype of a range of * pageblocks to MIGRATE_CMA which can be done by * __free_pageblock_cma() function. */ MIGRATE_CMA, #endif #ifdef CONFIG_MEMORY_ISOLATION MIGRATE_ISOLATE, /* can't allocate from here */ #endif MIGRATE_TYPES }; /* In mm/page_alloc.c; keep in sync also with show_migration_types() there */ extern const char * const migratetype_names[MIGRATE_TYPES]; #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) # define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA) # define is_migrate_cma_folio(folio, pfn) (MIGRATE_CMA == \ get_pfnblock_flags_mask(&folio->page, pfn, MIGRATETYPE_MASK)) #else # define is_migrate_cma(migratetype) false # define is_migrate_cma_page(_page) false # define is_migrate_cma_folio(folio, pfn) false #endif static inline bool is_migrate_movable(int mt) { return is_migrate_cma(mt) || mt == MIGRATE_MOVABLE; } /* * Check whether a migratetype can be merged with another migratetype. * * It is only mergeable when it can fall back to other migratetypes for * allocation. See fallbacks[MIGRATE_TYPES][3] in page_alloc.c. */ static inline bool migratetype_is_mergeable(int mt) { return mt < MIGRATE_PCPTYPES; } #define for_each_migratetype_order(order, type) \ for (order = 0; order < NR_PAGE_ORDERS; order++) \ for (type = 0; type < MIGRATE_TYPES; type++) extern int page_group_by_mobility_disabled; #define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1) #define get_pageblock_migratetype(page) \ get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK) #define folio_migratetype(folio) \ get_pfnblock_flags_mask(&folio->page, folio_pfn(folio), \ MIGRATETYPE_MASK) struct free_area { struct list_head free_list[MIGRATE_TYPES]; unsigned long nr_free; }; struct pglist_data; #ifdef CONFIG_NUMA enum numa_stat_item { NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ NUMA_FOREIGN, /* was intended here, hit elsewhere */ NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ NUMA_LOCAL, /* allocation from local node */ NUMA_OTHER, /* allocation from other node */ NR_VM_NUMA_EVENT_ITEMS }; #else #define NR_VM_NUMA_EVENT_ITEMS 0 #endif enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */ NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE, NR_ZONE_ACTIVE_ANON, NR_ZONE_INACTIVE_FILE, NR_ZONE_ACTIVE_FILE, NR_ZONE_UNEVICTABLE, NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ /* Second 128 byte cacheline */ NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ #endif NR_FREE_CMA_PAGES, #ifdef CONFIG_UNACCEPTED_MEMORY NR_UNACCEPTED, #endif NR_VM_ZONE_STAT_ITEMS }; enum node_stat_item { NR_LRU_BASE, NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ NR_ACTIVE_ANON, /* " " " " " */ NR_INACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */ NR_UNEVICTABLE, /* " " " " " */ NR_SLAB_RECLAIMABLE_B, NR_SLAB_UNRECLAIMABLE_B, NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ WORKINGSET_NODES, WORKINGSET_REFAULT_BASE, WORKINGSET_REFAULT_ANON = WORKINGSET_REFAULT_BASE, WORKINGSET_REFAULT_FILE, WORKINGSET_ACTIVATE_BASE, WORKINGSET_ACTIVATE_ANON = WORKINGSET_ACTIVATE_BASE, WORKINGSET_ACTIVATE_FILE, WORKINGSET_RESTORE_BASE, WORKINGSET_RESTORE_ANON = WORKINGSET_RESTORE_BASE, WORKINGSET_RESTORE_FILE, WORKINGSET_NODERECLAIM, NR_ANON_MAPPED, /* Mapped anonymous pages */ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ NR_FILE_PAGES, NR_FILE_DIRTY, NR_WRITEBACK, NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ NR_SHMEM_THPS, NR_SHMEM_PMDMAPPED, NR_FILE_THPS, NR_FILE_PMDMAPPED, NR_ANON_THPS, NR_VMSCAN_WRITE, NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ NR_THROTTLED_WRITTEN, /* NR_WRITTEN while reclaim throttled */ NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ NR_KERNEL_STACK_KB, /* measured in KiB */ #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) NR_KERNEL_SCS_KB, /* measured in KiB */ #endif NR_PAGETABLE, /* used for pagetables */ NR_SECONDARY_PAGETABLE, /* secondary pagetables, KVM & IOMMU */ #ifdef CONFIG_IOMMU_SUPPORT NR_IOMMU_PAGES, /* # of pages allocated by IOMMU */ #endif #ifdef CONFIG_SWAP NR_SWAPCACHE, #endif #ifdef CONFIG_NUMA_BALANCING PGPROMOTE_SUCCESS, /* promote successfully */ PGPROMOTE_CANDIDATE, /* candidate pages to promote */ #endif /* PGDEMOTE_*: pages demoted */ PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif NR_VM_NODE_STAT_ITEMS }; /* * Returns true if the item should be printed in THPs (/proc/vmstat * currently prints number of anon, file and shmem THPs. But the item * is charged in pages). */ static __always_inline bool vmstat_item_print_in_thp(enum node_stat_item item) { if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) return false; return item == NR_ANON_THPS || item == NR_FILE_THPS || item == NR_SHMEM_THPS || item == NR_SHMEM_PMDMAPPED || item == NR_FILE_PMDMAPPED; } /* * Returns true if the value is measured in bytes (most vmstat values are * measured in pages). This defines the API part, the internal representation * might be different. */ static __always_inline bool vmstat_item_in_bytes(int idx) { /* * Global and per-node slab counters track slab pages. * It's expected that changes are multiples of PAGE_SIZE. * Internally values are stored in pages. * * Per-memcg and per-lruvec counters track memory, consumed * by individual slab objects. These counters are actually * byte-precise. */ return (idx == NR_SLAB_RECLAIMABLE_B || idx == NR_SLAB_UNRECLAIMABLE_B); } /* * We do arithmetic on the LRU lists in various places in the code, * so it is important to keep the active lists LRU_ACTIVE higher in * the array than the corresponding inactive lists, and to keep * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists. * * This has to be kept in sync with the statistics in zone_stat_item * above and the descriptions in vmstat_text in mm/vmstat.c */ #define LRU_BASE 0 #define LRU_ACTIVE 1 #define LRU_FILE 2 enum lru_list { LRU_INACTIVE_ANON = LRU_BASE, LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE, LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE, LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE, LRU_UNEVICTABLE, NR_LRU_LISTS }; enum vmscan_throttle_state { VMSCAN_THROTTLE_WRITEBACK, VMSCAN_THROTTLE_ISOLATED, VMSCAN_THROTTLE_NOPROGRESS, VMSCAN_THROTTLE_CONGESTED, NR_VMSCAN_THROTTLE, }; #define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++) #define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++) static inline bool is_file_lru(enum lru_list lru) { return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE); } static inline bool is_active_lru(enum lru_list lru) { return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); } #define WORKINGSET_ANON 0 #define WORKINGSET_FILE 1 #define ANON_AND_FILE 2 enum lruvec_flags { /* * An lruvec has many dirty pages backed by a congested BDI: * 1. LRUVEC_CGROUP_CONGESTED is set by cgroup-level reclaim. * It can be cleared by cgroup reclaim or kswapd. * 2. LRUVEC_NODE_CONGESTED is set by kswapd node-level reclaim. * It can only be cleared by kswapd. * * Essentially, kswapd can unthrottle an lruvec throttled by cgroup * reclaim, but not vice versa. This only applies to the root cgroup. * The goal is to prevent cgroup reclaim on the root cgroup (e.g. * memory.reclaim) to unthrottle an unbalanced node (that was throttled * by kswapd). */ LRUVEC_CGROUP_CONGESTED, LRUVEC_NODE_CONGESTED, }; #endif /* !__GENERATING_BOUNDS_H */ /* * Evictable folios are divided into multiple generations. The youngest and the * oldest generation numbers, max_seq and min_seq, are monotonically increasing. * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the * corresponding generation. The gen counter in folio->flags stores gen+1 while * a folio is on one of lrugen->folios[]. Otherwise it stores 0. * * After a folio is faulted in, the aging needs to check the accessed bit at * least twice before handing this folio over to the eviction. The first check * clears the accessed bit from the initial fault; the second check makes sure * this folio hasn't been used since then. This process, AKA second chance, * requires a minimum of two generations, hence MIN_NR_GENS. And to maintain ABI * compatibility with the active/inactive LRU, e.g., /proc/vmstat, these two * generations are considered active; the rest of generations, if they exist, * are considered inactive. See lru_gen_is_active(). * * PG_active is always cleared while a folio is on one of lrugen->folios[] so * that the sliding window needs not to worry about it. And it's set again when * a folio considered active is isolated for non-reclaiming purposes, e.g., * migration. See lru_gen_add_folio() and lru_gen_del_folio(). * * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the * number of categories of the active/inactive LRU when keeping track of * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits * in folio->flags, masked by LRU_GEN_MASK. */ #define MIN_NR_GENS 2U #define MAX_NR_GENS 4U /* * Each generation is divided into multiple tiers. A folio accessed N times * through file descriptors is in tier order_base_2(N). A folio in the first * tier (N=0,1) is marked by PG_referenced unless it was faulted in through page * tables or read ahead. A folio in the last tier (MAX_NR_TIERS-1) is marked by * PG_workingset. A folio in any other tier (1<N<5) between the first and last * is marked by additional bits of LRU_REFS_WIDTH in folio->flags. * * In contrast to moving across generations which requires the LRU lock, moving * across tiers only involves atomic operations on folio->flags and therefore * has a negligible cost in the buffered access path. In the eviction path, * comparisons of refaulted/(evicted+protected) from the first tier and the rest * infer whether folios accessed multiple times through file descriptors are * statistically hot and thus worth protecting. * * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice the * number of categories of the active/inactive LRU when keeping track of * accesses through file descriptors. This uses MAX_NR_TIERS-2 spare bits in * folio->flags, masked by LRU_REFS_MASK. */ #define MAX_NR_TIERS 4U #ifndef __GENERATING_BOUNDS_H #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) #define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) /* * For folios accessed multiple times through file descriptors, * lru_gen_inc_refs() sets additional bits of LRU_REFS_WIDTH in folio->flags * after PG_referenced, then PG_workingset after LRU_REFS_WIDTH. After all its * bits are set, i.e., LRU_REFS_FLAGS|BIT(PG_workingset), a folio is lazily * promoted into the second oldest generation in the eviction path. And when * folio_inc_gen() does that, it clears LRU_REFS_FLAGS so that * lru_gen_inc_refs() can start over. Note that for this case, LRU_REFS_MASK is * only valid when PG_referenced is set. * * For folios accessed multiple times through page tables, folio_update_gen() * from a page table walk or lru_gen_set_refs() from a rmap walk sets * PG_referenced after the accessed bit is cleared for the first time. * Thereafter, those two paths set PG_workingset and promote folios to the * youngest generation. Like folio_inc_gen(), folio_update_gen() also clears * PG_referenced. Note that for this case, LRU_REFS_MASK is not used. * * For both cases above, after PG_workingset is set on a folio, it remains until * this folio is either reclaimed, or "deactivated" by lru_gen_clear_refs(). It * can be set again if lru_gen_test_recent() returns true upon a refault. */ #define LRU_REFS_FLAGS (LRU_REFS_MASK | BIT(PG_referenced)) struct lruvec; struct page_vma_mapped_walk; #ifdef CONFIG_LRU_GEN enum { LRU_GEN_ANON, LRU_GEN_FILE, }; enum { LRU_GEN_CORE, LRU_GEN_MM_WALK, LRU_GEN_NONLEAF_YOUNG, NR_LRU_GEN_CAPS }; #define MIN_LRU_BATCH BITS_PER_LONG #define MAX_LRU_BATCH (MIN_LRU_BATCH * 64) /* whether to keep historical stats from evicted generations */ #ifdef CONFIG_LRU_GEN_STATS #define NR_HIST_GENS MAX_NR_GENS #else #define NR_HIST_GENS 1U #endif /* * The youngest generation number is stored in max_seq for both anon and file * types as they are aged on an equal footing. The oldest generation numbers are * stored in min_seq[] separately for anon and file types so that they can be * incremented independently. Ideally min_seq[] are kept in sync when both anon * and file types are evictable. However, to adapt to situations like extreme * swappiness, they are allowed to be out of sync by at most * MAX_NR_GENS-MIN_NR_GENS-1. * * The number of pages in each generation is eventually consistent and therefore * can be transiently negative when reset_batch_size() is pending. */ struct lru_gen_folio { /* the aging increments the youngest generation number */ unsigned long max_seq; /* the eviction increments the oldest generation numbers */ unsigned long min_seq[ANON_AND_FILE]; /* the birth time of each generation in jiffies */ unsigned long timestamps[MAX_NR_GENS]; /* the multi-gen LRU lists, lazily sorted on eviction */ struct list_head folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; /* the multi-gen LRU sizes, eventually consistent */ long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; /* the exponential moving average of refaulted */ unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS]; /* the exponential moving average of evicted+protected */ unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS]; /* can only be modified under the LRU lock */ unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; /* can be modified without holding the LRU lock */ atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; /* whether the multi-gen LRU is enabled */ bool enabled; /* the memcg generation this lru_gen_folio belongs to */ u8 gen; /* the list segment this lru_gen_folio belongs to */ u8 seg; /* per-node lru_gen_folio list for global reclaim */ struct hlist_nulls_node list; }; enum { MM_LEAF_TOTAL, /* total leaf entries */ MM_LEAF_YOUNG, /* young leaf entries */ MM_NONLEAF_FOUND, /* non-leaf entries found in Bloom filters */ MM_NONLEAF_ADDED, /* non-leaf entries added to Bloom filters */ NR_MM_STATS }; /* double-buffering Bloom filters */ #define NR_BLOOM_FILTERS 2 struct lru_gen_mm_state { /* synced with max_seq after each iteration */ unsigned long seq; /* where the current iteration continues after */ struct list_head *head; /* where the last iteration ended before */ struct list_head *tail; /* Bloom filters flip after each iteration */ unsigned long *filters[NR_BLOOM_FILTERS]; /* the mm stats for debugging */ unsigned long stats[NR_HIST_GENS][NR_MM_STATS]; }; struct lru_gen_mm_walk { /* the lruvec under reclaim */ struct lruvec *lruvec; /* max_seq from lru_gen_folio: can be out of date */ unsigned long seq; /* the next address within an mm to scan */ unsigned long next_addr; /* to batch promoted pages */ int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; /* to batch the mm stats */ int mm_stats[NR_MM_STATS]; /* total batched items */ int batched; int swappiness; bool force_scan; }; /* * For each node, memcgs are divided into two generations: the old and the * young. For each generation, memcgs are randomly sharded into multiple bins * to improve scalability. For each bin, the hlist_nulls is virtually divided * into three segments: the head, the tail and the default. * * An onlining memcg is added to the tail of a random bin in the old generation. * The eviction starts at the head of a random bin in the old generation. The * per-node memcg generation counter, whose reminder (mod MEMCG_NR_GENS) indexes * the old generation, is incremented when all its bins become empty. * * There are four operations: * 1. MEMCG_LRU_HEAD, which moves a memcg to the head of a random bin in its * current generation (old or young) and updates its "seg" to "head"; * 2. MEMCG_LRU_TAIL, which moves a memcg to the tail of a random bin in its * current generation (old or young) and updates its "seg" to "tail"; * 3. MEMCG_LRU_OLD, which moves a memcg to the head of a random bin in the old * generation, updates its "gen" to "old" and resets its "seg" to "default"; * 4. MEMCG_LRU_YOUNG, which moves a memcg to the tail of a random bin in the * young generation, updates its "gen" to "young" and resets its "seg" to * "default". * * The events that trigger the above operations are: * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; * 2. The first attempt to reclaim a memcg below low, which triggers * MEMCG_LRU_TAIL; * 3. The first attempt to reclaim a memcg offlined or below reclaimable size * threshold, which triggers MEMCG_LRU_TAIL; * 4. The second attempt to reclaim a memcg offlined or below reclaimable size * threshold, which triggers MEMCG_LRU_YOUNG; * 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG; * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; * 7. Offlining a memcg, which triggers MEMCG_LRU_OLD. * * Notes: * 1. Memcg LRU only applies to global reclaim, and the round-robin incrementing * of their max_seq counters ensures the eventual fairness to all eligible * memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). * 2. There are only two valid generations: old (seq) and young (seq+1). * MEMCG_NR_GENS is set to three so that when reading the generation counter * locklessly, a stale value (seq-1) does not wraparound to young. */ #define MEMCG_NR_GENS 3 #define MEMCG_NR_BINS 8 struct lru_gen_memcg { /* the per-node memcg generation counter */ unsigned long seq; /* each memcg has one lru_gen_folio per node */ unsigned long nr_memcgs[MEMCG_NR_GENS]; /* per-node lru_gen_folio list for global reclaim */ struct hlist_nulls_head fifo[MEMCG_NR_GENS][MEMCG_NR_BINS]; /* protects the above */ spinlock_t lock; }; void lru_gen_init_pgdat(struct pglist_data *pgdat); void lru_gen_init_lruvec(struct lruvec *lruvec); bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw); void lru_gen_init_memcg(struct mem_cgroup *memcg); void lru_gen_exit_memcg(struct mem_cgroup *memcg); void lru_gen_online_memcg(struct mem_cgroup *memcg); void lru_gen_offline_memcg(struct mem_cgroup *memcg); void lru_gen_release_memcg(struct mem_cgroup *memcg); void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid); #else /* !CONFIG_LRU_GEN */ static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) { } static inline void lru_gen_init_lruvec(struct lruvec *lruvec) { } static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw) { return false; } static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_online_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_offline_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_release_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid) { } #endif /* CONFIG_LRU_GEN */ struct lruvec { struct list_head lists[NR_LRU_LISTS]; /* per lruvec lru_lock for memcg */ spinlock_t lru_lock; /* * These track the cost of reclaiming one LRU - file or anon - * over the other. As the observed cost of reclaiming one LRU * increases, the reclaim scan balance tips toward the other. */ unsigned long anon_cost; unsigned long file_cost; /* Non-resident age, driven by LRU movement */ atomic_long_t nonresident_age; /* Refaults at the time of last reclaim cycle */ unsigned long refaults[ANON_AND_FILE]; /* Various lruvec state flags (enum lruvec_flags) */ unsigned long flags; #ifdef CONFIG_LRU_GEN /* evictable pages divided into generations */ struct lru_gen_folio lrugen; #ifdef CONFIG_LRU_GEN_WALKS_MMU /* to concurrently iterate lru_gen_mm_list */ struct lru_gen_mm_state mm_state; #endif #endif /* CONFIG_LRU_GEN */ #ifdef CONFIG_MEMCG struct pglist_data *pgdat; #endif struct zswap_lruvec_state zswap_lruvec_state; }; /* Isolate for asynchronous migration */ #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) /* Isolate unevictable pages */ #define ISOLATE_UNEVICTABLE ((__force isolate_mode_t)0x8) /* LRU Isolation modes. */ typedef unsigned __bitwise isolate_mode_t; enum zone_watermarks { WMARK_MIN, WMARK_LOW, WMARK_HIGH, WMARK_PROMO, NR_WMARK }; /* * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. Two additional lists * are added for THP. One PCP list is used by GPF_MOVABLE, and the other PCP list * is used by GFP_UNMOVABLE and GFP_RECLAIMABLE. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define NR_PCP_THP 2 #else #define NR_PCP_THP 0 #endif #define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1)) #define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP) /* * Flags used in pcp->flags field. * * PCPF_PREV_FREE_HIGH_ORDER: a high-order page is freed in the * previous page freeing. To avoid to drain PCP for an accident * high-order page freeing. * * PCPF_FREE_HIGH_BATCH: preserve "pcp->batch" pages in PCP before * draining PCP for consecutive high-order pages freeing without * allocation if data cache slice of CPU is large enough. To reduce * zone lock contention and keep cache-hot pages reusing. */ #define PCPF_PREV_FREE_HIGH_ORDER BIT(0) #define PCPF_FREE_HIGH_BATCH BIT(1) struct per_cpu_pages { spinlock_t lock; /* Protects lists field */ int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ int high_min; /* min high watermark */ int high_max; /* max high watermark */ int batch; /* chunk size for buddy add/remove */ u8 flags; /* protected by pcp->lock */ u8 alloc_factor; /* batch scaling factor during allocate */ #ifdef CONFIG_NUMA u8 expire; /* When 0, remote pagesets are drained */ #endif short free_count; /* consecutive free count */ /* Lists of pages, one per migrate type stored on the pcp-lists */ struct list_head lists[NR_PCP_LISTS]; } ____cacheline_aligned_in_smp; struct per_cpu_zonestat { #ifdef CONFIG_SMP s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; s8 stat_threshold; #endif #ifdef CONFIG_NUMA /* * Low priority inaccurate counters that are only folded * on demand. Use a large type to avoid the overhead of * folding during refresh_cpu_vm_stats. */ unsigned long vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; #endif }; struct per_cpu_nodestat { s8 stat_threshold; s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS]; }; #endif /* !__GENERATING_BOUNDS.H */ enum zone_type { /* * ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able * to DMA to all of the addressable memory (ZONE_NORMAL). * On architectures where this area covers the whole 32 bit address * space ZONE_DMA32 is used. ZONE_DMA is left for the ones with smaller * DMA addressing constraints. This distinction is important as a 32bit * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit * platforms may need both zones as they support peripherals with * different DMA addressing limitations. */ #ifdef CONFIG_ZONE_DMA ZONE_DMA, #endif #ifdef CONFIG_ZONE_DMA32 ZONE_DMA32, #endif /* * Normal addressable memory is in ZONE_NORMAL. DMA operations can be * performed on pages in ZONE_NORMAL if the DMA devices support * transfers to all addressable memory. */ ZONE_NORMAL, #ifdef CONFIG_HIGHMEM /* * A memory area that is only addressable by the kernel through * mapping portions into its own address space. This is for example * used by i386 to allow the kernel to address the memory beyond * 900MB. The kernel will set up special mappings (page * table entries on i386) for each page that the kernel needs to * access. */ ZONE_HIGHMEM, #endif /* * ZONE_MOVABLE is similar to ZONE_NORMAL, except that it contains * movable pages with few exceptional cases described below. Main use * cases for ZONE_MOVABLE are to make memory offlining/unplug more * likely to succeed, and to locally limit unmovable allocations - e.g., * to increase the number of THP/huge pages. Notable special cases are: * * 1. Pinned pages: (long-term) pinning of movable pages might * essentially turn such pages unmovable. Therefore, we do not allow * pinning long-term pages in ZONE_MOVABLE. When pages are pinned and * faulted, they come from the right zone right away. However, it is * still possible that address space already has pages in * ZONE_MOVABLE at the time when pages are pinned (i.e. user has * touches that memory before pinning). In such case we migrate them * to a different zone. When migration fails - pinning fails. * 2. memblock allocations: kernelcore/movablecore setups might create * situations where ZONE_MOVABLE contains unmovable allocations * after boot. Memory offlining and allocations fail early. * 3. Memory holes: kernelcore/movablecore setups might create very rare * situations where ZONE_MOVABLE contains memory holes after boot, * for example, if we have sections that are only partially * populated. Memory offlining and allocations fail early. * 4. PG_hwpoison pages: while poisoned pages can be skipped during * memory offlining, such pages cannot be allocated. * 5. Unmovable PG_offline pages: in paravirtualized environments, * hotplugged memory blocks might only partially be managed by the * buddy (e.g., via XEN-balloon, Hyper-V balloon, virtio-mem). The * parts not manged by the buddy are unmovable PG_offline pages. In * some cases (virtio-mem), such pages can be skipped during * memory offlining, however, cannot be moved/allocated. These * techniques might use alloc_contig_range() to hide previously * exposed pages from the buddy again (e.g., to implement some sort * of memory unplug in virtio-mem). * 6. ZERO_PAGE(0), kernelcore/movablecore setups might create * situations where ZERO_PAGE(0) which is allocated differently * on different platforms may end up in a movable zone. ZERO_PAGE(0) * cannot be migrated. * 7. Memory-hotplug: when using memmap_on_memory and onlining the * memory to the MOVABLE zone, the vmemmap pages are also placed in * such zone. Such pages cannot be really moved around as they are * self-stored in the range, but they are treated as movable when * the range they describe is about to be offlined. * * In general, no unmovable allocations that degrade memory offlining * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range()) * have to expect that migrating pages in ZONE_MOVABLE can fail (even * if has_unmovable_pages() states that there are no unmovable pages, * there can be false negatives). */ ZONE_MOVABLE, #ifdef CONFIG_ZONE_DEVICE ZONE_DEVICE, #endif __MAX_NR_ZONES }; #ifndef __GENERATING_BOUNDS_H #define ASYNC_AND_SYNC 2 struct zone { /* Read-mostly fields */ /* zone watermarks, access with *_wmark_pages(zone) macros */ unsigned long _watermark[NR_WMARK]; unsigned long watermark_boost; unsigned long nr_reserved_highatomic; unsigned long nr_free_highatomic; /* * We don't know if the memory that we're going to allocate will be * freeable or/and it will be released eventually, so to avoid totally * wasting several GB of ram we must reserve some of the lower zone * memory (otherwise we risk to run OOM on the lower zones despite * there being tons of freeable ram on the higher zones). This array is * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl * changes. */ long lowmem_reserve[MAX_NR_ZONES]; #ifdef CONFIG_NUMA int node; #endif struct pglist_data *zone_pgdat; struct per_cpu_pages __percpu *per_cpu_pageset; struct per_cpu_zonestat __percpu *per_cpu_zonestats; /* * the high and batch values are copied to individual pagesets for * faster access */ int pageset_high_min; int pageset_high_max; int pageset_batch; #ifndef CONFIG_SPARSEMEM /* * Flags for a pageblock_nr_pages block. See pageblock-flags.h. * In SPARSEMEM, this map is stored in struct mem_section */ unsigned long *pageblock_flags; #endif /* CONFIG_SPARSEMEM */ /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ unsigned long zone_start_pfn; /* * spanned_pages is the total pages spanned by the zone, including * holes, which is calculated as: * spanned_pages = zone_end_pfn - zone_start_pfn; * * present_pages is physical pages existing within the zone, which * is calculated as: * present_pages = spanned_pages - absent_pages(pages in holes); * * present_early_pages is present pages existing within the zone * located on memory available since early boot, excluding hotplugged * memory. * * managed_pages is present pages managed by the buddy system, which * is calculated as (reserved_pages includes pages allocated by the * bootmem allocator): * managed_pages = present_pages - reserved_pages; * * cma pages is present pages that are assigned for CMA use * (MIGRATE_CMA). * * So present_pages may be used by memory hotplug or memory power * management logic to figure out unmanaged pages by checking * (present_pages - managed_pages). And managed_pages should be used * by page allocator and vm scanner to calculate all kinds of watermarks * and thresholds. * * Locking rules: * * zone_start_pfn and spanned_pages are protected by span_seqlock. * It is a seqlock because it has to be read outside of zone->lock, * and it is done in the main allocator path. But, it is written * quite infrequently. * * The span_seq lock is declared along with zone->lock because it is * frequently read in proximity to zone->lock. It's good to * give them a chance of being in the same cacheline. * * Write access to present_pages at runtime should be protected by * mem_hotplug_begin/done(). Any reader who can't tolerant drift of * present_pages should use get_online_mems() to get a stable value. */ atomic_long_t managed_pages; unsigned long spanned_pages; unsigned long present_pages; #if defined(CONFIG_MEMORY_HOTPLUG) unsigned long present_early_pages; #endif #ifdef CONFIG_CMA unsigned long cma_pages; #endif const char *name; #ifdef CONFIG_MEMORY_ISOLATION /* * Number of isolated pageblock. It is used to solve incorrect * freepage counting problem due to racy retrieving migratetype * of pageblock. Protected by zone->lock. */ unsigned long nr_isolate_pageblock; #endif #ifdef CONFIG_MEMORY_HOTPLUG /* see spanned/present_pages for more description */ seqlock_t span_seqlock; #endif int initialized; /* Write-intensive fields used from the page allocator */ CACHELINE_PADDING(_pad1_); /* free areas of different sizes */ struct free_area free_area[NR_PAGE_ORDERS]; #ifdef CONFIG_UNACCEPTED_MEMORY /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */ struct list_head unaccepted_pages; #endif /* zone flags, see below */ unsigned long flags; /* Primarily protects free_area */ spinlock_t lock; /* Write-intensive fields used by compaction and vmstats. */ CACHELINE_PADDING(_pad2_); /* * When free pages are below this point, additional steps are taken * when reading the number of free pages to avoid per-cpu counter * drift allowing watermarks to be breached */ unsigned long percpu_drift_mark; #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* pfn where compaction free scanner should start */ unsigned long compact_cached_free_pfn; /* pfn where compaction migration scanner should start */ unsigned long compact_cached_migrate_pfn[ASYNC_AND_SYNC]; unsigned long compact_init_migrate_pfn; unsigned long compact_init_free_pfn; #endif #ifdef CONFIG_COMPACTION /* * On compaction failure, 1<<compact_defer_shift compactions * are skipped before trying again. The number attempted since * last failure is tracked with compact_considered. * compact_order_failed is the minimum compaction failed order. */ unsigned int compact_considered; unsigned int compact_defer_shift; int compact_order_failed; #endif #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* Set to true when the PG_migrate_skip bits should be cleared */ bool compact_blockskip_flush; #endif bool contiguous; CACHELINE_PADDING(_pad3_); /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; } ____cacheline_internodealigned_in_smp; enum pgdat_flags { PGDAT_DIRTY, /* reclaim scanning has recently found * many dirty file pages at the tail * of the LRU. */ PGDAT_WRITEBACK, /* reclaim scanning has recently found * many pages under writeback */ PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ }; enum zone_flags { ZONE_BOOSTED_WATERMARK, /* zone recently boosted watermarks. * Cleared when kswapd is woken. */ ZONE_RECLAIM_ACTIVE, /* kswapd may be scanning the zone. */ ZONE_BELOW_HIGH, /* zone is below high watermark. */ }; static inline unsigned long wmark_pages(const struct zone *z, enum zone_watermarks w) { return z->_watermark[w] + z->watermark_boost; } static inline unsigned long min_wmark_pages(const struct zone *z) { return wmark_pages(z, WMARK_MIN); } static inline unsigned long low_wmark_pages(const struct zone *z) { return wmark_pages(z, WMARK_LOW); } static inline unsigned long high_wmark_pages(const struct zone *z) { return wmark_pages(z, WMARK_HIGH); } static inline unsigned long promo_wmark_pages(const struct zone *z) { return wmark_pages(z, WMARK_PROMO); } static inline unsigned long zone_managed_pages(struct zone *zone) { return (unsigned long)atomic_long_read(&zone->managed_pages); } static inline unsigned long zone_cma_pages(struct zone *zone) { #ifdef CONFIG_CMA return zone->cma_pages; #else return 0; #endif } static inline unsigned long zone_end_pfn(const struct zone *zone) { return zone->zone_start_pfn + zone->spanned_pages; } static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn) { return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone); } static inline bool zone_is_initialized(struct zone *zone) { return zone->initialized; } static inline bool zone_is_empty(struct zone *zone) { return zone->spanned_pages == 0; } #ifndef BUILD_VDSO32_64 /* * The zone field is never updated after free_area_init_core() * sets it, so none of the operations on it need to be atomic. */ /* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) #define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) #define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH) #define LRU_REFS_PGOFF (LRU_GEN_PGOFF - LRU_REFS_WIDTH) /* * Define the bit shifts to access each section. For non-existent * sections we define the shift as 0; that plus a 0 mask ensures * the compiler will optimise away reference to them. */ #define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) #define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0)) #define KASAN_TAG_PGSHIFT (KASAN_TAG_PGOFF * (KASAN_TAG_WIDTH != 0)) /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ #ifdef NODE_NOT_IN_PAGE_FLAGS #define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) #define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF) ? \ SECTIONS_PGOFF : ZONES_PGOFF) #else #define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT) #define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF) ? \ NODES_PGOFF : ZONES_PGOFF) #endif #define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) #define NODES_MASK ((1UL << NODES_WIDTH) - 1) #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) #define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1) #define KASAN_TAG_MASK ((1UL << KASAN_TAG_WIDTH) - 1) #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) static inline enum zone_type page_zonenum(const struct page *page) { ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } static inline enum zone_type folio_zonenum(const struct folio *folio) { return page_zonenum(&folio->page); } #ifdef CONFIG_ZONE_DEVICE static inline bool is_zone_device_page(const struct page *page) { return page_zonenum(page) == ZONE_DEVICE; } /* * Consecutive zone device pages should not be merged into the same sgl * or bvec segment with other types of pages or if they belong to different * pgmaps. Otherwise getting the pgmap of a given segment is not possible * without scanning the entire segment. This helper returns true either if * both pages are not zone device pages or both pages are zone device pages * with the same pgmap. */ static inline bool zone_device_pages_have_same_pgmap(const struct page *a, const struct page *b) { if (is_zone_device_page(a) != is_zone_device_page(b)) return false; if (!is_zone_device_page(a)) return true; return a->pgmap == b->pgmap; } extern void memmap_init_zone_device(struct zone *, unsigned long, unsigned long, struct dev_pagemap *); #else static inline bool is_zone_device_page(const struct page *page) { return false; } static inline bool zone_device_pages_have_same_pgmap(const struct page *a, const struct page *b) { return true; } #endif static inline bool folio_is_zone_device(const struct folio *folio) { return is_zone_device_page(&folio->page); } static inline bool is_zone_movable_page(const struct page *page) { return page_zonenum(page) == ZONE_MOVABLE; } static inline bool folio_is_zone_movable(const struct folio *folio) { return folio_zonenum(folio) == ZONE_MOVABLE; } #endif /* * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty * intersection with the given zone */ static inline bool zone_intersects(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages) { if (zone_is_empty(zone)) return false; if (start_pfn >= zone_end_pfn(zone) || start_pfn + nr_pages <= zone->zone_start_pfn) return false; return true; } /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the * queues ("queue_length >> 12") during an aging round. */ #define DEF_PRIORITY 12 /* Maximum number of zones on a zonelist */ #define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES) enum { ZONELIST_FALLBACK, /* zonelist with fallback */ #ifdef CONFIG_NUMA /* * The NUMA zonelists are doubled because we need zonelists that * restrict the allocations to a single node for __GFP_THISNODE. */ ZONELIST_NOFALLBACK, /* zonelist without fallback (__GFP_THISNODE) */ #endif MAX_ZONELISTS }; /* * This struct contains information about a zone in a zonelist. It is stored * here to avoid dereferences into large structures and lookups of tables */ struct zoneref { struct zone *zone; /* Pointer to actual zone */ int zone_idx; /* zone_idx(zoneref->zone) */ }; /* * One allocation request operates on a zonelist. A zonelist * is a list of zones, the first one is the 'goal' of the * allocation, the other zones are fallback zones, in decreasing * priority. * * To speed the reading of the zonelist, the zonerefs contain the zone index * of the entry being read. Helper functions to access information given * a struct zoneref are * * zonelist_zone() - Return the struct zone * for an entry in _zonerefs * zonelist_zone_idx() - Return the index of the zone for an entry * zonelist_node_idx() - Return the index of the node for an entry */ struct zonelist { struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1]; }; /* * The array of struct pages for flatmem. * It must be declared for SPARSEMEM as well because there are configurations * that rely on that. */ extern struct page *mem_map; #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct deferred_split { spinlock_t split_queue_lock; struct list_head split_queue; unsigned long split_queue_len; }; #endif #ifdef CONFIG_MEMORY_FAILURE /* * Per NUMA node memory failure handling statistics. */ struct memory_failure_stats { /* * Number of raw pages poisoned. * Cases not accounted: memory outside kernel control, offline page, * arch-specific memory_failure (SGX), hwpoison_filter() filtered * error events, and unpoison actions from hwpoison_unpoison. */ unsigned long total; /* * Recovery results of poisoned raw pages handled by memory_failure, * in sync with mf_result. * total = ignored + failed + delayed + recovered. * total * PAGE_SIZE * #nodes = /proc/meminfo/HardwareCorrupted. */ unsigned long ignored; unsigned long failed; unsigned long delayed; unsigned long recovered; }; #endif /* * On NUMA machines, each NUMA node would have a pg_data_t to describe * it's memory layout. On UMA machines there is a single pglist_data which * describes the whole memory. * * Memory statistics and page replacement data structures are maintained on a * per-zone basis. */ typedef struct pglist_data { /* * node_zones contains just the zones for THIS node. Not all of the * zones may be populated, but it is the full list. It is referenced by * this node's node_zonelists as well as other node's node_zonelists. */ struct zone node_zones[MAX_NR_ZONES]; /* * node_zonelists contains references to all zones in all nodes. * Generally the first zones will be references to this node's * node_zones. */ struct zonelist node_zonelists[MAX_ZONELISTS]; int nr_zones; /* number of populated zones in this node */ #ifdef CONFIG_FLATMEM /* means !SPARSEMEM */ struct page *node_mem_map; #ifdef CONFIG_PAGE_EXTENSION struct page_ext *node_page_ext; #endif #endif #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* * Must be held any time you expect node_start_pfn, * node_present_pages, node_spanned_pages or nr_zones to stay constant. * Also synchronizes pgdat->first_deferred_pfn during deferred page * init. * * pgdat_resize_lock() and pgdat_resize_unlock() are provided to * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG * or CONFIG_DEFERRED_STRUCT_PAGE_INIT. * * Nests above zone->lock and zone->span_seqlock */ spinlock_t node_size_lock; #endif unsigned long node_start_pfn; unsigned long node_present_pages; /* total number of physical pages */ unsigned long node_spanned_pages; /* total size of physical page range, including holes */ int node_id; wait_queue_head_t kswapd_wait; wait_queue_head_t pfmemalloc_wait; /* workqueues for throttling reclaim for different reasons. */ wait_queue_head_t reclaim_wait[NR_VMSCAN_THROTTLE]; atomic_t nr_writeback_throttled;/* nr of writeback-throttled tasks */ unsigned long nr_reclaim_start; /* nr pages written while throttled * when throttling started. */ #ifdef CONFIG_MEMORY_HOTPLUG struct mutex kswapd_lock; #endif struct task_struct *kswapd; /* Protected by kswapd_lock */ int kswapd_order; enum zone_type kswapd_highest_zoneidx; int kswapd_failures; /* Number of 'reclaimed == 0' runs */ #ifdef CONFIG_COMPACTION int kcompactd_max_order; enum zone_type kcompactd_highest_zoneidx; wait_queue_head_t kcompactd_wait; struct task_struct *kcompactd; bool proactive_compact_trigger; #endif /* * This is a per-node reserve of pages that are not available * to userspace allocations. */ unsigned long totalreserve_pages; #ifdef CONFIG_NUMA /* * node reclaim becomes active if more unmapped pages exist. */ unsigned long min_unmapped_pages; unsigned long min_slab_pages; #endif /* CONFIG_NUMA */ /* Write-intensive fields used by page reclaim */ CACHELINE_PADDING(_pad1_); #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* * If memory initialisation on large machines is deferred then this * is the first PFN that needs to be initialised. */ unsigned long first_deferred_pfn; #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct deferred_split deferred_split_queue; #endif #ifdef CONFIG_NUMA_BALANCING /* start time in ms of current promote rate limit period */ unsigned int nbp_rl_start; /* number of promote candidate pages at start time of current rate limit period */ unsigned long nbp_rl_nr_cand; /* promote threshold in ms */ unsigned int nbp_threshold; /* start time in ms of current promote threshold adjustment period */ unsigned int nbp_th_start; /* * number of promote candidate pages at start time of current promote * threshold adjustment period */ unsigned long nbp_th_nr_cand; #endif /* Fields commonly accessed by the page reclaim scanner */ /* * NOTE: THIS IS UNUSED IF MEMCG IS ENABLED. * * Use mem_cgroup_lruvec() to look up lruvecs. */ struct lruvec __lruvec; unsigned long flags; #ifdef CONFIG_LRU_GEN /* kswap mm walk data */ struct lru_gen_mm_walk mm_walk; /* lru_gen_folio list */ struct lru_gen_memcg memcg_lru; #endif CACHELINE_PADDING(_pad2_); /* Per-node vmstats */ struct per_cpu_nodestat __percpu *per_cpu_nodestats; atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; #ifdef CONFIG_NUMA struct memory_tier __rcu *memtier; #endif #ifdef CONFIG_MEMORY_FAILURE struct memory_failure_stats mf_stats; #endif } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) { return pgdat->node_start_pfn + pgdat->node_spanned_pages; } #include <linux/memory_hotplug.h> void build_all_zonelists(pg_data_t *pgdat); void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order, enum zone_type highest_zoneidx); bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx, unsigned int alloc_flags, long free_pages); bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx, unsigned int alloc_flags); bool zone_watermark_ok_safe(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx); /* * Memory initialization context, use to differentiate memory added by * the platform statically or via memory hotplug interface. */ enum meminit_context { MEMINIT_EARLY, MEMINIT_HOTPLUG, }; extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, unsigned long size); extern void lruvec_init(struct lruvec *lruvec); static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec) { #ifdef CONFIG_MEMCG return lruvec->pgdat; #else return container_of(lruvec, struct pglist_data, __lruvec); #endif } #ifdef CONFIG_HAVE_MEMORYLESS_NODES int local_memory_node(int node_id); #else static inline int local_memory_node(int node_id) { return node_id; }; #endif /* * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. */ #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) #ifdef CONFIG_ZONE_DEVICE static inline bool zone_is_zone_device(struct zone *zone) { return zone_idx(zone) == ZONE_DEVICE; } #else static inline bool zone_is_zone_device(struct zone *zone) { return false; } #endif /* * Returns true if a zone has pages managed by the buddy allocator. * All the reclaim decisions have to use this function rather than * populated_zone(). If the whole zone is reserved then we can easily * end up with populated_zone() && !managed_zone(). */ static inline bool managed_zone(struct zone *zone) { return zone_managed_pages(zone); } /* Returns true if a zone has memory */ static inline bool populated_zone(struct zone *zone) { return zone->present_pages; } #ifdef CONFIG_NUMA static inline int zone_to_nid(struct zone *zone) { return zone->node; } static inline void zone_set_nid(struct zone *zone, int nid) { zone->node = nid; } #else static inline int zone_to_nid(struct zone *zone) { return 0; } static inline void zone_set_nid(struct zone *zone, int nid) {} #endif extern int movable_zone; static inline int is_highmem_idx(enum zone_type idx) { #ifdef CONFIG_HIGHMEM return (idx == ZONE_HIGHMEM || (idx == ZONE_MOVABLE && movable_zone == ZONE_HIGHMEM)); #else return 0; #endif } /** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references * to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum. * @zone: pointer to struct zone variable * Return: 1 for a highmem zone, 0 otherwise */ static inline int is_highmem(struct zone *zone) { return is_highmem_idx(zone_idx(zone)); } #ifdef CONFIG_ZONE_DMA bool has_managed_dma(void); #else static inline bool has_managed_dma(void) { return false; } #endif #ifndef CONFIG_NUMA extern struct pglist_data contig_page_data; static inline struct pglist_data *NODE_DATA(int nid) { return &contig_page_data; } #else /* CONFIG_NUMA */ #include <asm/mmzone.h> #endif /* !CONFIG_NUMA */ extern struct pglist_data *first_online_pgdat(void); extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); extern struct zone *next_zone(struct zone *zone); /** * for_each_online_pgdat - helper macro to iterate over all online nodes * @pgdat: pointer to a pg_data_t variable */ #define for_each_online_pgdat(pgdat) \ for (pgdat = first_online_pgdat(); \ pgdat; \ pgdat = next_online_pgdat(pgdat)) /** * for_each_zone - helper macro to iterate over all memory zones * @zone: pointer to struct zone variable * * The user only needs to declare the zone variable, for_each_zone * fills it in. */ #define for_each_zone(zone) \ for (zone = (first_online_pgdat())->node_zones; \ zone; \ zone = next_zone(zone)) #define for_each_populated_zone(zone) \ for (zone = (first_online_pgdat())->node_zones; \ zone; \ zone = next_zone(zone)) \ if (!populated_zone(zone)) \ ; /* do nothing */ \ else static inline struct zone *zonelist_zone(struct zoneref *zoneref) { return zoneref->zone; } static inline int zonelist_zone_idx(struct zoneref *zoneref) { return zoneref->zone_idx; } static inline int zonelist_node_idx(struct zoneref *zoneref) { return zone_to_nid(zoneref->zone); } struct zoneref *__next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, nodemask_t *nodes); /** * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point * @z: The cursor used as a starting point for the search * @highest_zoneidx: The zone index of the highest zone to return * @nodes: An optional nodemask to filter the zonelist with * * This function returns the next zone at or below a given zone index that is * within the allowed nodemask using a cursor as the starting point for the * search. The zoneref returned is a cursor that represents the current zone * being examined. It should be advanced by one before calling * next_zones_zonelist again. * * Return: the next zone at or below highest_zoneidx within the allowed * nodemask using a cursor within a zonelist as a starting point */ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, nodemask_t *nodes) { if (likely(!nodes && zonelist_zone_idx(z) <= highest_zoneidx)) return z; return __next_zones_zonelist(z, highest_zoneidx, nodes); } /** * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist * @zonelist: The zonelist to search for a suitable zone * @highest_zoneidx: The zone index of the highest zone to return * @nodes: An optional nodemask to filter the zonelist with * * This function returns the first zone at or below a given zone index that is * within the allowed nodemask. The zoneref returned is a cursor that can be * used to iterate the zonelist with next_zones_zonelist by advancing it by * one before calling. * * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is * never NULL). This may happen either genuinely, or due to concurrent nodemask * update due to cpuset modification. * * Return: Zoneref pointer for the first suitable zone found */ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, enum zone_type highest_zoneidx, nodemask_t *nodes) { return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes); } /** * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask * @zone: The current zone in the iterator * @z: The current pointer within zonelist->_zonerefs being iterated * @zlist: The zonelist being iterated * @highidx: The zone index of the highest zone to return * @nodemask: Nodemask allowed by the allocator * * This iterator iterates though all zones at or below a given zone index and * within a given nodemask */ #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ for (z = first_zones_zonelist(zlist, highidx, nodemask), zone = zonelist_zone(z); \ zone; \ z = next_zones_zonelist(++z, highidx, nodemask), \ zone = zonelist_zone(z)) #define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \ for (zone = zonelist_zone(z); \ zone; \ z = next_zones_zonelist(++z, highidx, nodemask), \ zone = zonelist_zone(z)) /** * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index * @zone: The current zone in the iterator * @z: The current pointer within zonelist->zones being iterated * @zlist: The zonelist being iterated * @highidx: The zone index of the highest zone to return * * This iterator iterates though all zones at or below a given zone index. */ #define for_each_zone_zonelist(zone, z, zlist, highidx) \ for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL) /* Whether the 'nodes' are all movable nodes */ static inline bool movable_only_nodes(nodemask_t *nodes) { struct zonelist *zonelist; struct zoneref *z; int nid; if (nodes_empty(*nodes)) return false; /* * We can chose arbitrary node from the nodemask to get a * zonelist as they are interlinked. We just need to find * at least one zone that can satisfy kernel allocations. */ nid = first_node(*nodes); zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK]; z = first_zones_zonelist(zonelist, ZONE_NORMAL, nodes); return (!zonelist_zone(z)) ? true : false; } #ifdef CONFIG_SPARSEMEM #include <asm/sparsemem.h> #endif #ifdef CONFIG_FLATMEM #define pfn_to_nid(pfn) (0) #endif #ifdef CONFIG_SPARSEMEM /* * PA_SECTION_SHIFT physical address to/from section number * PFN_SECTION_SHIFT pfn to/from section number */ #define PA_SECTION_SHIFT (SECTION_SIZE_BITS) #define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT) #define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT) #define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT) #define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) #define SECTION_BLOCKFLAGS_BITS \ ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS) #if (MAX_PAGE_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS #error Allocator MAX_PAGE_ORDER exceeds SECTION_SIZE #endif static inline unsigned long pfn_to_section_nr(unsigned long pfn) { return pfn >> PFN_SECTION_SHIFT; } static inline unsigned long section_nr_to_pfn(unsigned long sec) { return sec << PFN_SECTION_SHIFT; } #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) #define SUBSECTION_SHIFT 21 #define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT) #define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT) #define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT) #define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1)) #if SUBSECTION_SHIFT > SECTION_SIZE_BITS #error Subsection size exceeds section size #else #define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT)) #endif #define SUBSECTION_ALIGN_UP(pfn) ALIGN((pfn), PAGES_PER_SUBSECTION) #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK) struct mem_section_usage { struct rcu_head rcu; #ifdef CONFIG_SPARSEMEM_VMEMMAP DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); #endif /* See declaration of similar field in struct zone */ unsigned long pageblock_flags[0]; }; void subsection_map_init(unsigned long pfn, unsigned long nr_pages); struct page; struct page_ext; struct mem_section { /* * This is, logically, a pointer to an array of struct * pages. However, it is stored with some other magic. * (see sparse.c::sparse_init_one_section()) * * Additionally during early boot we encode node id of * the location of the section here to guide allocation. * (see sparse.c::memory_present()) * * Making it a UL at least makes someone do a cast * before using it wrong. */ unsigned long section_mem_map; struct mem_section_usage *usage; #ifdef CONFIG_PAGE_EXTENSION /* * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use * section. (see page_ext.h about this.) */ struct page_ext *page_ext; unsigned long pad; #endif /* * WARNING: mem_section must be a power-of-2 in size for the * calculation and use of SECTION_ROOT_MASK to make sense. */ }; #ifdef CONFIG_SPARSEMEM_EXTREME #define SECTIONS_PER_ROOT (PAGE_SIZE / sizeof (struct mem_section)) #else #define SECTIONS_PER_ROOT 1 #endif #define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT) #define NR_SECTION_ROOTS DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT) #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) #ifdef CONFIG_SPARSEMEM_EXTREME extern struct mem_section **mem_section; #else extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif static inline unsigned long *section_to_usemap(struct mem_section *ms) { return ms->usage->pageblock_flags; } static inline struct mem_section *__nr_to_section(unsigned long nr) { unsigned long root = SECTION_NR_TO_ROOT(nr); if (unlikely(root >= NR_SECTION_ROOTS)) return NULL; #ifdef CONFIG_SPARSEMEM_EXTREME if (!mem_section || !mem_section[root]) return NULL; #endif return &mem_section[root][nr & SECTION_ROOT_MASK]; } extern size_t mem_section_usage_size(void); /* * We use the lower bits of the mem_map pointer to store * a little bit of information. The pointer is calculated * as mem_map - section_nr_to_pfn(pnum). The result is * aligned to the minimum alignment of the two values: * 1. All mem_map arrays are page-aligned. * 2. section_nr_to_pfn() always clears PFN_SECTION_SHIFT * lowest bits. PFN_SECTION_SHIFT is arch-specific * (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the * worst combination is powerpc with 256k pages, * which results in PFN_SECTION_SHIFT equal 6. * To sum it up, at least 6 bits are available on all architectures. * However, we can exceed 6 bits on some other architectures except * powerpc (e.g. 15 bits are available on x86_64, 13 bits are available * with the worst case of 64K pages on arm64) if we make sure the * exceeded bit is not applicable to powerpc. */ enum { SECTION_MARKED_PRESENT_BIT, SECTION_HAS_MEM_MAP_BIT, SECTION_IS_ONLINE_BIT, SECTION_IS_EARLY_BIT, #ifdef CONFIG_ZONE_DEVICE SECTION_TAINT_ZONE_DEVICE_BIT, #endif SECTION_MAP_LAST_BIT, }; #define SECTION_MARKED_PRESENT BIT(SECTION_MARKED_PRESENT_BIT) #define SECTION_HAS_MEM_MAP BIT(SECTION_HAS_MEM_MAP_BIT) #define SECTION_IS_ONLINE BIT(SECTION_IS_ONLINE_BIT) #define SECTION_IS_EARLY BIT(SECTION_IS_EARLY_BIT) #ifdef CONFIG_ZONE_DEVICE #define SECTION_TAINT_ZONE_DEVICE BIT(SECTION_TAINT_ZONE_DEVICE_BIT) #endif #define SECTION_MAP_MASK (~(BIT(SECTION_MAP_LAST_BIT) - 1)) #define SECTION_NID_SHIFT SECTION_MAP_LAST_BIT static inline struct page *__section_mem_map_addr(struct mem_section *section) { unsigned long map = section->section_mem_map; map &= SECTION_MAP_MASK; return (struct page *)map; } static inline int present_section(struct mem_section *section) { return (section && (section->section_mem_map & SECTION_MARKED_PRESENT)); } static inline int present_section_nr(unsigned long nr) { return present_section(__nr_to_section(nr)); } static inline int valid_section(struct mem_section *section) { return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP)); } static inline int early_section(struct mem_section *section) { return (section && (section->section_mem_map & SECTION_IS_EARLY)); } static inline int valid_section_nr(unsigned long nr) { return valid_section(__nr_to_section(nr)); } static inline int online_section(struct mem_section *section) { return (section && (section->section_mem_map & SECTION_IS_ONLINE)); } #ifdef CONFIG_ZONE_DEVICE static inline int online_device_section(struct mem_section *section) { unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE; return section && ((section->section_mem_map & flags) == flags); } #else static inline int online_device_section(struct mem_section *section) { return 0; } #endif static inline int online_section_nr(unsigned long nr) { return online_section(__nr_to_section(nr)); } #ifdef CONFIG_MEMORY_HOTPLUG void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn); void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn); #endif static inline struct mem_section *__pfn_to_section(unsigned long pfn) { return __nr_to_section(pfn_to_section_nr(pfn)); } extern unsigned long __highest_present_section_nr; static inline int subsection_map_index(unsigned long pfn) { return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION; } #ifdef CONFIG_SPARSEMEM_VMEMMAP static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { int idx = subsection_map_index(pfn); struct mem_section_usage *usage = READ_ONCE(ms->usage); return usage ? test_bit(idx, usage->subsection_map) : 0; } #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { return 1; } #endif #ifndef CONFIG_HAVE_ARCH_PFN_VALID /** * pfn_valid - check if there is a valid memory map entry for a PFN * @pfn: the page frame number to check * * Check if there is a valid memory map entry aka struct page for the @pfn. * Note, that availability of the memory map entry does not imply that * there is actual usable memory at that @pfn. The struct page may * represent a hole or an unusable page frame. * * Return: 1 for PFNs that have memory map entries and 0 otherwise */ static inline int pfn_valid(unsigned long pfn) { struct mem_section *ms; int ret; /* * Ensure the upper PAGE_SHIFT bits are clear in the * pfn. Else it might lead to false positives when * some of the upper bits are set, but the lower bits * match a valid pfn. */ if (PHYS_PFN(PFN_PHYS(pfn)) != pfn) return 0; if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __pfn_to_section(pfn); rcu_read_lock_sched(); if (!valid_section(ms)) { rcu_read_unlock_sched(); return 0; } /* * Traditionally early sections always returned pfn_valid() for * the entire section-sized span. */ ret = early_section(ms) || pfn_section_valid(ms, pfn); rcu_read_unlock_sched(); return ret; } #endif static inline int pfn_in_present_section(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; return present_section(__pfn_to_section(pfn)); } static inline unsigned long next_present_section_nr(unsigned long section_nr) { while (++section_nr <= __highest_present_section_nr) { if (present_section_nr(section_nr)) return section_nr; } return -1; } /* * These are _only_ used during initialisation, therefore they * can use __initdata ... They could have names to indicate * this restriction. */ #ifdef CONFIG_NUMA #define pfn_to_nid(pfn) \ ({ \ unsigned long __pfn_to_nid_pfn = (pfn); \ page_to_nid(pfn_to_page(__pfn_to_nid_pfn)); \ }) #else #define pfn_to_nid(pfn) (0) #endif void sparse_init(void); #else #define sparse_init() do {} while (0) #define sparse_index_init(_sec, _nid) do {} while (0) #define pfn_in_present_section pfn_valid #define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */
1 1 1 23 9 2 7 25 25 25 1 1 1 1 1 1 1 1 1 1 1 1 23 1 23 1 1 1 1 1 1 1 1 1 1 1 1 23 23 23 23 23 23 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 24 24 24 24 29 30 29 29 1 1 1 1 1 1 1 1 1 25 25 25 25 25 25 25 25 2 25 25 25 25 25 25 25 25 2 25 25 25 25 25 23 23 25 25 25 25 9 9 9 25 25 25 9 9 9 2 9 9 7 7 9 9 25 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 23 23 23 23 23 23 23 1 23 23 1 23 23 1 23 23 23 23 23 23 23 23 23 22 23 23 23 23 23 23 23 23 23 1 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 1 23 23 23 23 23 23 23 23 23 23 23 23 23 23 5 23 23 23 23 23 23 23 23 23 23 1 23 23 23 23 23 23 1 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "bkey_methods.h" #include "bkey_sort.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_iter.h" #include "btree_locking.h" #include "btree_update.h" #include "btree_update_interior.h" #include "buckets.h" #include "checksum.h" #include "debug.h" #include "error.h" #include "extents.h" #include "io_write.h" #include "journal_reclaim.h" #include "journal_seq_blacklist.h" #include "recovery.h" #include "super-io.h" #include "trace.h" #include <linux/sched/mm.h> static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn) { bch2_btree_id_level_to_text(out, BTREE_NODE_ID(bn), BTREE_NODE_LEVEL(bn)); prt_printf(out, " seq %llx %llu\n", bn->keys.seq, BTREE_NODE_SEQ(bn)); prt_str(out, "min: "); bch2_bpos_to_text(out, bn->min_key); prt_newline(out); prt_str(out, "max: "); bch2_bpos_to_text(out, bn->max_key); } void bch2_btree_node_io_unlock(struct btree *b) { EBUG_ON(!btree_node_write_in_flight(b)); clear_btree_node_write_in_flight_inner(b); clear_btree_node_write_in_flight(b); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); } void bch2_btree_node_io_lock(struct btree *b) { wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight, TASK_UNINTERRUPTIBLE); } void __bch2_btree_node_wait_on_read(struct btree *b) { wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, TASK_UNINTERRUPTIBLE); } void __bch2_btree_node_wait_on_write(struct btree *b) { wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, TASK_UNINTERRUPTIBLE); } void bch2_btree_node_wait_on_read(struct btree *b) { wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, TASK_UNINTERRUPTIBLE); } void bch2_btree_node_wait_on_write(struct btree *b) { wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, TASK_UNINTERRUPTIBLE); } static void verify_no_dups(struct btree *b, struct bkey_packed *start, struct bkey_packed *end) { #ifdef CONFIG_BCACHEFS_DEBUG struct bkey_packed *k, *p; if (start == end) return; for (p = start, k = bkey_p_next(start); k != end; p = k, k = bkey_p_next(k)) { struct bkey l = bkey_unpack_key(b, p); struct bkey r = bkey_unpack_key(b, k); BUG_ON(bpos_ge(l.p, bkey_start_pos(&r))); } #endif } static void set_needs_whiteout(struct bset *i, int v) { struct bkey_packed *k; for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) k->needs_whiteout = v; } static void btree_bounce_free(struct bch_fs *c, size_t size, bool used_mempool, void *p) { if (used_mempool) mempool_free(p, &c->btree_bounce_pool); else kvfree(p); } static void *btree_bounce_alloc(struct bch_fs *c, size_t size, bool *used_mempool) { unsigned flags = memalloc_nofs_save(); void *p; BUG_ON(size > c->opts.btree_node_size); *used_mempool = false; p = kvmalloc(size, __GFP_NOWARN|GFP_NOWAIT); if (!p) { *used_mempool = true; p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); } memalloc_nofs_restore(flags); return p; } static void sort_bkey_ptrs(const struct btree *bt, struct bkey_packed **ptrs, unsigned nr) { unsigned n = nr, a = nr / 2, b, c, d; if (!a) return; /* Heap sort: see lib/sort.c: */ while (1) { if (a) a--; else if (--n) swap(ptrs[0], ptrs[n]); else break; for (b = a; c = 2 * b + 1, (d = c + 1) < n;) b = bch2_bkey_cmp_packed(bt, ptrs[c], ptrs[d]) >= 0 ? c : d; if (d == n) b = c; while (b != a && bch2_bkey_cmp_packed(bt, ptrs[a], ptrs[b]) >= 0) b = (b - 1) / 2; c = b; while (b != a) { b = (b - 1) / 2; swap(ptrs[b], ptrs[c]); } } } static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) { struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k; bool used_mempool = false; size_t bytes = b->whiteout_u64s * sizeof(u64); if (!b->whiteout_u64s) return; new_whiteouts = btree_bounce_alloc(c, bytes, &used_mempool); ptrs = ptrs_end = ((void *) new_whiteouts + bytes); for (k = unwritten_whiteouts_start(b); k != unwritten_whiteouts_end(b); k = bkey_p_next(k)) *--ptrs = k; sort_bkey_ptrs(b, ptrs, ptrs_end - ptrs); k = new_whiteouts; while (ptrs != ptrs_end) { bkey_p_copy(k, *ptrs); k = bkey_p_next(k); ptrs++; } verify_no_dups(b, new_whiteouts, (void *) ((u64 *) new_whiteouts + b->whiteout_u64s)); memcpy_u64s(unwritten_whiteouts_start(b), new_whiteouts, b->whiteout_u64s); btree_bounce_free(c, bytes, used_mempool, new_whiteouts); } static bool should_compact_bset(struct btree *b, struct bset_tree *t, bool compacting, enum compact_mode mode) { if (!bset_dead_u64s(b, t)) return false; switch (mode) { case COMPACT_LAZY: return should_compact_bset_lazy(b, t) || (compacting && !bset_written(b, bset(b, t))); case COMPACT_ALL: return true; default: BUG(); } } static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode) { bool ret = false; for_each_bset(b, t) { struct bset *i = bset(b, t); struct bkey_packed *k, *n, *out, *start, *end; struct btree_node_entry *src = NULL, *dst = NULL; if (t != b->set && !bset_written(b, i)) { src = container_of(i, struct btree_node_entry, keys); dst = max(write_block(b), (void *) btree_bkey_last(b, t - 1)); } if (src != dst) ret = true; if (!should_compact_bset(b, t, ret, mode)) { if (src != dst) { memmove(dst, src, sizeof(*src) + le16_to_cpu(src->keys.u64s) * sizeof(u64)); i = &dst->keys; set_btree_bset(b, t, i); } continue; } start = btree_bkey_first(b, t); end = btree_bkey_last(b, t); if (src != dst) { memmove(dst, src, sizeof(*src)); i = &dst->keys; set_btree_bset(b, t, i); } out = i->start; for (k = start; k != end; k = n) { n = bkey_p_next(k); if (!bkey_deleted(k)) { bkey_p_copy(out, k); out = bkey_p_next(out); } else { BUG_ON(k->needs_whiteout); } } i->u64s = cpu_to_le16((u64 *) out - i->_data); set_btree_bset_end(b, t); bch2_bset_set_no_aux_tree(b, t); ret = true; } bch2_verify_btree_nr_keys(b); bch2_btree_build_aux_trees(b); return ret; } bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, enum compact_mode mode) { return bch2_drop_whiteouts(b, mode); } static void btree_node_sort(struct bch_fs *c, struct btree *b, unsigned start_idx, unsigned end_idx) { struct btree_node *out; struct sort_iter_stack sort_iter; struct bset_tree *t; struct bset *start_bset = bset(b, &b->set[start_idx]); bool used_mempool = false; u64 start_time, seq = 0; unsigned i, u64s = 0, bytes, shift = end_idx - start_idx - 1; bool sorting_entire_node = start_idx == 0 && end_idx == b->nsets; sort_iter_stack_init(&sort_iter, b); for (t = b->set + start_idx; t < b->set + end_idx; t++) { u64s += le16_to_cpu(bset(b, t)->u64s); sort_iter_add(&sort_iter.iter, btree_bkey_first(b, t), btree_bkey_last(b, t)); } bytes = sorting_entire_node ? btree_buf_bytes(b) : __vstruct_bytes(struct btree_node, u64s); out = btree_bounce_alloc(c, bytes, &used_mempool); start_time = local_clock(); u64s = bch2_sort_keys(out->keys.start, &sort_iter.iter); out->keys.u64s = cpu_to_le16(u64s); BUG_ON(vstruct_end(&out->keys) > (void *) out + bytes); if (sorting_entire_node) bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort], start_time); /* Make sure we preserve bset journal_seq: */ for (t = b->set + start_idx; t < b->set + end_idx; t++) seq = max(seq, le64_to_cpu(bset(b, t)->journal_seq)); start_bset->journal_seq = cpu_to_le64(seq); if (sorting_entire_node) { u64s = le16_to_cpu(out->keys.u64s); BUG_ON(bytes != btree_buf_bytes(b)); /* * Our temporary buffer is the same size as the btree node's * buffer, we can just swap buffers instead of doing a big * memcpy() */ *out = *b->data; out->keys.u64s = cpu_to_le16(u64s); swap(out, b->data); set_btree_bset(b, b->set, &b->data->keys); } else { start_bset->u64s = out->keys.u64s; memcpy_u64s(start_bset->start, out->keys.start, le16_to_cpu(out->keys.u64s)); } for (i = start_idx + 1; i < end_idx; i++) b->nr.bset_u64s[start_idx] += b->nr.bset_u64s[i]; b->nsets -= shift; for (i = start_idx + 1; i < b->nsets; i++) { b->nr.bset_u64s[i] = b->nr.bset_u64s[i + shift]; b->set[i] = b->set[i + shift]; } for (i = b->nsets; i < MAX_BSETS; i++) b->nr.bset_u64s[i] = 0; set_btree_bset_end(b, &b->set[start_idx]); bch2_bset_set_no_aux_tree(b, &b->set[start_idx]); btree_bounce_free(c, bytes, used_mempool, out); bch2_verify_btree_nr_keys(b); } void bch2_btree_sort_into(struct bch_fs *c, struct btree *dst, struct btree *src) { struct btree_nr_keys nr; struct btree_node_iter src_iter; u64 start_time = local_clock(); BUG_ON(dst->nsets != 1); bch2_bset_set_no_aux_tree(dst, dst->set); bch2_btree_node_iter_init_from_start(&src_iter, src); nr = bch2_sort_repack(btree_bset_first(dst), src, &src_iter, &dst->format, true); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort], start_time); set_btree_bset_end(dst, dst->set); dst->nr.live_u64s += nr.live_u64s; dst->nr.bset_u64s[0] += nr.bset_u64s[0]; dst->nr.packed_keys += nr.packed_keys; dst->nr.unpacked_keys += nr.unpacked_keys; bch2_verify_btree_nr_keys(dst); } /* * We're about to add another bset to the btree node, so if there's currently * too many bsets - sort some of them together: */ static bool btree_node_compact(struct bch_fs *c, struct btree *b) { unsigned unwritten_idx; bool ret = false; for (unwritten_idx = 0; unwritten_idx < b->nsets; unwritten_idx++) if (!bset_written(b, bset(b, &b->set[unwritten_idx]))) break; if (b->nsets - unwritten_idx > 1) { btree_node_sort(c, b, unwritten_idx, b->nsets); ret = true; } if (unwritten_idx > 1) { btree_node_sort(c, b, 0, unwritten_idx); ret = true; } return ret; } void bch2_btree_build_aux_trees(struct btree *b) { for_each_bset(b, t) bch2_bset_build_aux_tree(b, t, !bset_written(b, bset(b, t)) && t == bset_tree_last(b)); } /* * If we have MAX_BSETS (3) bsets, should we sort them all down to just one? * * The first bset is going to be of similar order to the size of the node, the * last bset is bounded by btree_write_set_buffer(), which is set to keep the * memmove on insert from being too expensive: the middle bset should, ideally, * be the geometric mean of the first and the last. * * Returns true if the middle bset is greater than that geometric mean: */ static inline bool should_compact_all(struct bch_fs *c, struct btree *b) { unsigned mid_u64s_bits = (ilog2(btree_max_u64s(c)) + BTREE_WRITE_SET_U64s_BITS) / 2; return bset_u64s(&b->set[1]) > 1U << mid_u64s_bits; } /* * @bch_btree_init_next - initialize a new (unwritten) bset that can then be * inserted into * * Safe to call if there already is an unwritten bset - will only add a new bset * if @b doesn't already have one. * * Returns true if we sorted (i.e. invalidated iterators */ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) { struct bch_fs *c = trans->c; struct btree_node_entry *bne; bool reinit_iter = false; EBUG_ON(!six_lock_counts(&b->c.lock).n[SIX_LOCK_write]); BUG_ON(bset_written(b, bset(b, &b->set[1]))); BUG_ON(btree_node_just_written(b)); if (b->nsets == MAX_BSETS && !btree_node_write_in_flight(b) && should_compact_all(c, b)) { bch2_btree_node_write_trans(trans, b, SIX_LOCK_write, BTREE_WRITE_init_next_bset); reinit_iter = true; } if (b->nsets == MAX_BSETS && btree_node_compact(c, b)) reinit_iter = true; BUG_ON(b->nsets >= MAX_BSETS); bne = want_new_bset(c, b); if (bne) bch2_bset_init_next(b, bne); bch2_btree_build_aux_trees(b); if (reinit_iter) bch2_trans_node_reinit_iter(trans, b); } static void btree_err_msg(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, struct bkey_packed *k, unsigned offset, int write) { prt_printf(out, bch2_log_msg(c, "%s"), write == READ ? "error validating btree node " : "corrupt btree node before write "); if (ca) prt_printf(out, "on %s ", ca->name); prt_printf(out, "at btree "); bch2_btree_pos_to_text(out, c, b); printbuf_indent_add(out, 2); prt_printf(out, "\nnode offset %u/%u", b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key))); if (i) prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s)); if (k) prt_printf(out, " bset byte offset %lu", (unsigned long)(void *)k - ((unsigned long)(void *)i & ~511UL)); prt_str(out, ": "); } __printf(10, 11) static int __btree_err(int ret, struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, struct bkey_packed *k, int write, bool have_retry, enum bch_sb_error_id err_type, const char *fmt, ...) { struct printbuf out = PRINTBUF; bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes; va_list args; btree_err_msg(&out, c, ca, b, i, k, b->written, write); va_start(args, fmt); prt_vprintf(&out, fmt, args); va_end(args); if (write == WRITE) { bch2_print_string_as_lines(KERN_ERR, out.buf); ret = c->opts.errors == BCH_ON_ERROR_continue ? 0 : -BCH_ERR_fsck_errors_not_fixed; goto out; } if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry) ret = -BCH_ERR_btree_node_read_err_fixable; if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry) ret = -BCH_ERR_btree_node_read_err_bad_node; if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable) bch2_sb_error_count(c, err_type); switch (ret) { case -BCH_ERR_btree_node_read_err_fixable: ret = !silent ? __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf) : -BCH_ERR_fsck_fix; if (ret != -BCH_ERR_fsck_fix && ret != -BCH_ERR_fsck_ignore) goto fsck_err; ret = -BCH_ERR_fsck_fix; break; case -BCH_ERR_btree_node_read_err_want_retry: case -BCH_ERR_btree_node_read_err_must_retry: if (!silent) bch2_print_string_as_lines(KERN_ERR, out.buf); break; case -BCH_ERR_btree_node_read_err_bad_node: if (!silent) bch2_print_string_as_lines(KERN_ERR, out.buf); ret = bch2_topology_error(c); break; case -BCH_ERR_btree_node_read_err_incompatible: if (!silent) bch2_print_string_as_lines(KERN_ERR, out.buf); ret = -BCH_ERR_fsck_errors_not_fixed; break; default: BUG(); } out: fsck_err: printbuf_exit(&out); return ret; } #define btree_err(type, c, ca, b, i, k, _err_type, msg, ...) \ ({ \ int _ret = __btree_err(type, c, ca, b, i, k, write, have_retry, \ BCH_FSCK_ERR_##_err_type, \ msg, ##__VA_ARGS__); \ \ if (_ret != -BCH_ERR_fsck_fix) { \ ret = _ret; \ goto fsck_err; \ } \ \ *saw_error = true; \ }) #define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false) /* * When btree topology repair changes the start or end of a node, that might * mean we have to drop keys that are no longer inside the node: */ __cold void bch2_btree_node_drop_keys_outside_node(struct btree *b) { for_each_bset(b, t) { struct bset *i = bset(b, t); struct bkey_packed *k; for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) if (bkey_cmp_left_packed(b, k, &b->data->min_key) >= 0) break; if (k != i->start) { unsigned shift = (u64 *) k - (u64 *) i->start; memmove_u64s_down(i->start, k, (u64 *) vstruct_end(i) - (u64 *) k); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - shift); set_btree_bset_end(b, t); } for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) if (bkey_cmp_left_packed(b, k, &b->data->max_key) > 0) break; if (k != vstruct_last(i)) { i->u64s = cpu_to_le16((u64 *) k - (u64 *) i->start); set_btree_bset_end(b, t); } } /* * Always rebuild search trees: eytzinger search tree nodes directly * depend on the values of min/max key: */ bch2_bset_set_no_aux_tree(b, b->set); bch2_btree_build_aux_trees(b); b->nr = bch2_btree_node_count_keys(b); struct bkey_s_c k; struct bkey unpacked; struct btree_node_iter iter; for_each_btree_node_key_unpack(b, k, &iter, &unpacked) { BUG_ON(bpos_lt(k.k->p, b->data->min_key)); BUG_ON(bpos_gt(k.k->p, b->data->max_key)); } } static int validate_bset(struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, unsigned offset, unsigned sectors, int write, bool have_retry, bool *saw_error) { unsigned version = le16_to_cpu(i->version); unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); struct printbuf buf1 = PRINTBUF; struct printbuf buf2 = PRINTBUF; int ret = 0; btree_err_on(!bch2_version_compatible(version), -BCH_ERR_btree_node_read_err_incompatible, c, ca, b, i, NULL, btree_node_unsupported_version, "unsupported bset version %u.%u", BCH_VERSION_MAJOR(version), BCH_VERSION_MINOR(version)); if (btree_err_on(version < c->sb.version_min, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, NULL, btree_node_bset_older_than_sb_min, "bset version %u older than superblock version_min %u", version, c->sb.version_min)) { mutex_lock(&c->sb_lock); c->disk_sb.sb->version_min = cpu_to_le16(version); bch2_write_super(c); mutex_unlock(&c->sb_lock); } if (btree_err_on(BCH_VERSION_MAJOR(version) > BCH_VERSION_MAJOR(c->sb.version), -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, NULL, btree_node_bset_newer_than_sb, "bset version %u newer than superblock version %u", version, c->sb.version)) { mutex_lock(&c->sb_lock); c->disk_sb.sb->version = cpu_to_le16(version); bch2_write_super(c); mutex_unlock(&c->sb_lock); } btree_err_on(BSET_SEPARATE_WHITEOUTS(i), -BCH_ERR_btree_node_read_err_incompatible, c, ca, b, i, NULL, btree_node_unsupported_version, "BSET_SEPARATE_WHITEOUTS no longer supported"); if (!write && btree_err_on(offset + sectors > (ptr_written ?: btree_sectors(c)), -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, NULL, bset_past_end_of_btree_node, "bset past end of btree node (offset %u len %u but written %zu)", offset, sectors, ptr_written ?: btree_sectors(c))) i->u64s = 0; btree_err_on(offset && !i->u64s, -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, NULL, bset_empty, "empty bset"); btree_err_on(BSET_OFFSET(i) && BSET_OFFSET(i) != offset, -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, NULL, bset_wrong_sector_offset, "bset at wrong sector offset"); if (!offset) { struct btree_node *bn = container_of(i, struct btree_node, keys); /* These indicate that we read the wrong btree node: */ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { struct bch_btree_ptr_v2 *bp = &bkey_i_to_btree_ptr_v2(&b->key)->v; /* XXX endianness */ btree_err_on(bp->seq != bn->keys.seq, -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, NULL, bset_bad_seq, "incorrect sequence number (wrong btree node)"); } btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id, -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i, NULL, btree_node_bad_btree, "incorrect btree id"); btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level, -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i, NULL, btree_node_bad_level, "incorrect level"); if (!write) compat_btree_node(b->c.level, b->c.btree_id, version, BSET_BIG_ENDIAN(i), write, bn); if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { struct bch_btree_ptr_v2 *bp = &bkey_i_to_btree_ptr_v2(&b->key)->v; if (BTREE_PTR_RANGE_UPDATED(bp)) { b->data->min_key = bp->min_key; b->data->max_key = b->key.k.p; } btree_err_on(!bpos_eq(b->data->min_key, bp->min_key), -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, NULL, btree_node_bad_min_key, "incorrect min_key: got %s should be %s", (printbuf_reset(&buf1), bch2_bpos_to_text(&buf1, bn->min_key), buf1.buf), (printbuf_reset(&buf2), bch2_bpos_to_text(&buf2, bp->min_key), buf2.buf)); } btree_err_on(!bpos_eq(bn->max_key, b->key.k.p), -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i, NULL, btree_node_bad_max_key, "incorrect max key %s", (printbuf_reset(&buf1), bch2_bpos_to_text(&buf1, bn->max_key), buf1.buf)); if (write) compat_btree_node(b->c.level, b->c.btree_id, version, BSET_BIG_ENDIAN(i), write, bn); btree_err_on(bch2_bkey_format_invalid(c, &bn->format, write, &buf1), -BCH_ERR_btree_node_read_err_bad_node, c, ca, b, i, NULL, btree_node_bad_format, "invalid bkey format: %s\n %s", buf1.buf, (printbuf_reset(&buf2), bch2_bkey_format_to_text(&buf2, &bn->format), buf2.buf)); printbuf_reset(&buf1); compat_bformat(b->c.level, b->c.btree_id, version, BSET_BIG_ENDIAN(i), write, &bn->format); } fsck_err: printbuf_exit(&buf2); printbuf_exit(&buf1); return ret; } static int btree_node_bkey_val_validate(struct bch_fs *c, struct btree *b, struct bkey_s_c k, enum bch_validate_flags flags) { return bch2_bkey_val_validate(c, k, (struct bkey_validate_context) { .from = BKEY_VALIDATE_btree_node, .level = b->c.level, .btree = b->c.btree_id, .flags = flags }); } static int bset_key_validate(struct bch_fs *c, struct btree *b, struct bkey_s_c k, bool updated_range, enum bch_validate_flags flags) { struct bkey_validate_context from = (struct bkey_validate_context) { .from = BKEY_VALIDATE_btree_node, .level = b->c.level, .btree = b->c.btree_id, .flags = flags, }; return __bch2_bkey_validate(c, k, from) ?: (!updated_range ? bch2_bkey_in_btree_node(c, b, k, from) : 0) ?: (flags & BCH_VALIDATE_write ? btree_node_bkey_val_validate(c, b, k, flags) : 0); } static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, struct bset *i, struct bkey_packed *k) { if (bkey_p_next(k) > vstruct_last(i)) return false; if (k->format > KEY_FORMAT_CURRENT) return false; if (!bkeyp_u64s_valid(&b->format, k)) return false; struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); return !__bch2_bkey_validate(c, u.s_c, (struct bkey_validate_context) { .from = BKEY_VALIDATE_btree_node, .level = b->c.level, .btree = b->c.btree_id, .flags = BCH_VALIDATE_silent }); } static inline int btree_node_read_bkey_cmp(const struct btree *b, const struct bkey_packed *l, const struct bkey_packed *r) { return bch2_bkey_cmp_packed(b, l, r) ?: (int) bkey_deleted(r) - (int) bkey_deleted(l); } static int validate_bset_keys(struct bch_fs *c, struct btree *b, struct bset *i, int write, bool have_retry, bool *saw_error) { unsigned version = le16_to_cpu(i->version); struct bkey_packed *k, *prev = NULL; struct printbuf buf = PRINTBUF; bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); int ret = 0; for (k = i->start; k != vstruct_last(i);) { struct bkey_s u; struct bkey tmp; unsigned next_good_key; if (btree_err_on(bkey_p_next(k) > vstruct_last(i), -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, k, btree_node_bkey_past_bset_end, "key extends past end of bset")) { i->u64s = cpu_to_le16((u64 *) k - i->_data); break; } if (btree_err_on(k->format > KEY_FORMAT_CURRENT, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, k, btree_node_bkey_bad_format, "invalid bkey format %u", k->format)) goto drop_this_key; if (btree_err_on(!bkeyp_u64s_valid(&b->format, k), -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, k, btree_node_bkey_bad_u64s, "bad k->u64s %u (min %u max %zu)", k->u64s, bkeyp_key_u64s(&b->format, k), U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k))) goto drop_this_key; if (!write) bch2_bkey_compat(b->c.level, b->c.btree_id, version, BSET_BIG_ENDIAN(i), write, &b->format, k); u = __bkey_disassemble(b, k, &tmp); ret = bset_key_validate(c, b, u.s_c, updated_range, write); if (ret == -BCH_ERR_fsck_delete_bkey) goto drop_this_key; if (ret) goto fsck_err; if (write) bch2_bkey_compat(b->c.level, b->c.btree_id, version, BSET_BIG_ENDIAN(i), write, &b->format, k); if (prev && btree_node_read_bkey_cmp(b, prev, k) >= 0) { struct bkey up = bkey_unpack_key(b, prev); printbuf_reset(&buf); prt_printf(&buf, "keys out of order: "); bch2_bkey_to_text(&buf, &up); prt_printf(&buf, " > "); bch2_bkey_to_text(&buf, u.k); if (btree_err(-BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, k, btree_node_bkey_out_of_order, "%s", buf.buf)) goto drop_this_key; } prev = k; k = bkey_p_next(k); continue; drop_this_key: next_good_key = k->u64s; if (!next_good_key || (BSET_BIG_ENDIAN(i) == CPU_BIG_ENDIAN && version >= bcachefs_metadata_version_snapshot)) { /* * only do scanning if bch2_bkey_compat() has nothing to * do */ if (!bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) { for (next_good_key = 1; next_good_key < (u64 *) vstruct_last(i) - (u64 *) k; next_good_key++) if (bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) goto got_good_key; } /* * didn't find a good key, have to truncate the rest of * the bset */ next_good_key = (u64 *) vstruct_last(i) - (u64 *) k; } got_good_key: le16_add_cpu(&i->u64s, -next_good_key); memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); set_btree_node_need_rewrite(b); } fsck_err: printbuf_exit(&buf); return ret; } int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, struct btree *b, bool have_retry, bool *saw_error) { struct btree_node_entry *bne; struct sort_iter *iter; struct btree_node *sorted; struct bkey_packed *k; struct bset *i; bool used_mempool, blacklisted; bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); unsigned u64s; unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); u64 max_journal_seq = 0; struct printbuf buf = PRINTBUF; int ret = 0, retry_read = 0, write = READ; u64 start_time = local_clock(); b->version_ondisk = U16_MAX; /* We might get called multiple times on read retry: */ b->written = 0; iter = mempool_alloc(&c->fill_iter, GFP_NOFS); sort_iter_init(iter, b, (btree_blocks(c) + 1) * 2); if (bch2_meta_read_fault("btree")) btree_err(-BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, NULL, btree_node_fault_injected, "dynamic fault"); btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c), -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, NULL, btree_node_bad_magic, "bad magic: want %llx, got %llx", bset_magic(c), le64_to_cpu(b->data->magic)); if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { struct bch_btree_ptr_v2 *bp = &bkey_i_to_btree_ptr_v2(&b->key)->v; bch2_bpos_to_text(&buf, b->data->min_key); prt_str(&buf, "-"); bch2_bpos_to_text(&buf, b->data->max_key); btree_err_on(b->data->keys.seq != bp->seq, -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, NULL, btree_node_bad_seq, "got wrong btree node: got\n%s", (printbuf_reset(&buf), bch2_btree_node_header_to_text(&buf, b->data), buf.buf)); } else { btree_err_on(!b->data->keys.seq, -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, NULL, btree_node_bad_seq, "bad btree header: seq 0\n%s", (printbuf_reset(&buf), bch2_btree_node_header_to_text(&buf, b->data), buf.buf)); } while (b->written < (ptr_written ?: btree_sectors(c))) { unsigned sectors; bool first = !b->written; if (first) { bne = NULL; i = &b->data->keys; } else { bne = write_block(b); i = &bne->keys; if (i->seq != b->data->keys.seq) break; } struct nonce nonce = btree_nonce(i, b->written << 9); bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)); btree_err_on(!good_csum_type, bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) ? -BCH_ERR_btree_node_read_err_must_retry : -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, NULL, bset_unknown_csum, "unknown checksum type %llu", BSET_CSUM_TYPE(i)); if (first) { if (good_csum_type) { struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); bool csum_bad = bch2_crc_cmp(b->data->csum, csum); if (csum_bad) bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); btree_err_on(csum_bad, -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, NULL, bset_bad_csum, "%s", (printbuf_reset(&buf), bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum), buf.buf)); ret = bset_encrypt(c, i, b->written << 9); if (bch2_fs_fatal_err_on(ret, c, "decrypting btree node: %s", bch2_err_str(ret))) goto fsck_err; } btree_err_on(btree_node_type_is_extents(btree_node_type(b)) && !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data), -BCH_ERR_btree_node_read_err_incompatible, c, NULL, b, NULL, NULL, btree_node_unsupported_version, "btree node does not have NEW_EXTENT_OVERWRITE set"); sectors = vstruct_sectors(b->data, c->block_bits); } else { if (good_csum_type) { struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); bool csum_bad = bch2_crc_cmp(bne->csum, csum); if (ca && csum_bad) bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); btree_err_on(csum_bad, -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, NULL, bset_bad_csum, "%s", (printbuf_reset(&buf), bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum), buf.buf)); ret = bset_encrypt(c, i, b->written << 9); if (bch2_fs_fatal_err_on(ret, c, "decrypting btree node: %s", bch2_err_str(ret))) goto fsck_err; } sectors = vstruct_sectors(bne, c->block_bits); } b->version_ondisk = min(b->version_ondisk, le16_to_cpu(i->version)); ret = validate_bset(c, ca, b, i, b->written, sectors, READ, have_retry, saw_error); if (ret) goto fsck_err; if (!b->written) btree_node_set_format(b, b->data->format); ret = validate_bset_keys(c, b, i, READ, have_retry, saw_error); if (ret) goto fsck_err; SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); blacklisted = bch2_journal_seq_is_blacklisted(c, le64_to_cpu(i->journal_seq), true); btree_err_on(blacklisted && first, -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, NULL, bset_blacklisted_journal_seq, "first btree node bset has blacklisted journal seq (%llu)", le64_to_cpu(i->journal_seq)); btree_err_on(blacklisted && ptr_written, -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, NULL, first_bset_blacklisted_journal_seq, "found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u", le64_to_cpu(i->journal_seq), b->written, b->written + sectors, ptr_written); b->written += sectors; if (blacklisted && !first) continue; sort_iter_add(iter, vstruct_idx(i, 0), vstruct_last(i)); max_journal_seq = max(max_journal_seq, le64_to_cpu(i->journal_seq)); } if (ptr_written) { btree_err_on(b->written < ptr_written, -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, NULL, NULL, btree_node_data_missing, "btree node data missing: expected %u sectors, found %u", ptr_written, b->written); } else { for (bne = write_block(b); bset_byte_offset(b, bne) < btree_buf_bytes(b); bne = (void *) bne + block_bytes(c)) btree_err_on(bne->keys.seq == b->data->keys.seq && !bch2_journal_seq_is_blacklisted(c, le64_to_cpu(bne->keys.journal_seq), true), -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, NULL, NULL, btree_node_bset_after_end, "found bset signature after last bset"); } sorted = btree_bounce_alloc(c, btree_buf_bytes(b), &used_mempool); sorted->keys.u64s = 0; set_btree_bset(b, b->set, &b->data->keys); b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter); memset((uint8_t *)(sorted + 1) + b->nr.live_u64s * sizeof(u64), 0, btree_buf_bytes(b) - sizeof(struct btree_node) - b->nr.live_u64s * sizeof(u64)); u64s = le16_to_cpu(sorted->keys.u64s); *sorted = *b->data; sorted->keys.u64s = cpu_to_le16(u64s); swap(sorted, b->data); set_btree_bset(b, b->set, &b->data->keys); b->nsets = 1; b->data->keys.journal_seq = cpu_to_le64(max_journal_seq); BUG_ON(b->nr.live_u64s != u64s); btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted); if (updated_range) bch2_btree_node_drop_keys_outside_node(b); i = &b->data->keys; for (k = i->start; k != vstruct_last(i);) { struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); ret = btree_node_bkey_val_validate(c, b, u.s_c, READ); if (ret == -BCH_ERR_fsck_delete_bkey || (bch2_inject_invalid_keys && !bversion_cmp(u.k->bversion, MAX_VERSION))) { btree_keys_account_key_drop(&b->nr, 0, k); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); set_btree_bset_end(b, b->set); set_btree_node_need_rewrite(b); continue; } if (ret) goto fsck_err; if (u.k->type == KEY_TYPE_btree_ptr_v2) { struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u); bp.v->mem_ptr = 0; } k = bkey_p_next(k); } bch2_bset_build_aux_tree(b, b->set, false); set_needs_whiteout(btree_bset_first(b), true); btree_node_reset_sib_u64s(b); rcu_read_lock(); bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) set_btree_node_need_rewrite(b); } rcu_read_unlock(); if (!ptr_written) set_btree_node_need_rewrite(b); out: mempool_free(iter, &c->fill_iter); printbuf_exit(&buf); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time); return retry_read; fsck_err: if (ret == -BCH_ERR_btree_node_read_err_want_retry || ret == -BCH_ERR_btree_node_read_err_must_retry) { retry_read = 1; } else { set_btree_node_read_error(b); bch2_btree_lost_data(c, b->c.btree_id); } goto out; } static void btree_node_read_work(struct work_struct *work) { struct btree_read_bio *rb = container_of(work, struct btree_read_bio, work); struct bch_fs *c = rb->c; struct bch_dev *ca = rb->have_ioref ? bch2_dev_have_ref(c, rb->pick.ptr.dev) : NULL; struct btree *b = rb->b; struct bio *bio = &rb->bio; struct bch_io_failures failed = { .nr = 0 }; struct printbuf buf = PRINTBUF; bool saw_error = false; bool retry = false; bool can_retry; goto start; while (1) { retry = true; bch_info(c, "retrying read"); ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ); rb->have_ioref = ca != NULL; bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META); bio->bi_iter.bi_sector = rb->pick.ptr.offset; bio->bi_iter.bi_size = btree_buf_bytes(b); if (rb->have_ioref) { bio_set_dev(bio, ca->disk_sb.bdev); submit_bio_wait(bio); } else { bio->bi_status = BLK_STS_REMOVED; } start: printbuf_reset(&buf); bch2_btree_pos_to_text(&buf, c, b); bch2_dev_io_err_on(ca && bio->bi_status, ca, BCH_MEMBER_ERROR_read, "btree read error %s for %s", bch2_blk_status_to_str(bio->bi_status), buf.buf); if (rb->have_ioref) percpu_ref_put(&ca->io_ref); rb->have_ioref = false; bch2_mark_io_failure(&failed, &rb->pick); can_retry = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), &failed, &rb->pick) > 0; if (!bio->bi_status && !bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) { if (retry) bch_info(c, "retry success"); break; } saw_error = true; if (!can_retry) { set_btree_node_read_error(b); bch2_btree_lost_data(c, b->c.btree_id); break; } } bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], rb->start_time); bio_put(&rb->bio); if ((saw_error || btree_node_need_rewrite(b)) && !btree_node_read_error(b) && c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { if (saw_error) { printbuf_reset(&buf); bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); prt_str(&buf, " "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s", __func__, buf.buf); } bch2_btree_node_rewrite_async(c, b); } printbuf_exit(&buf); clear_btree_node_read_in_flight(b); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } static void btree_node_read_endio(struct bio *bio) { struct btree_read_bio *rb = container_of(bio, struct btree_read_bio, bio); struct bch_fs *c = rb->c; if (rb->have_ioref) { struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev); bch2_latency_acct(ca, rb->start_time, READ); } queue_work(c->btree_read_complete_wq, &rb->work); } struct btree_node_read_all { struct closure cl; struct bch_fs *c; struct btree *b; unsigned nr; void *buf[BCH_REPLICAS_MAX]; struct bio *bio[BCH_REPLICAS_MAX]; blk_status_t err[BCH_REPLICAS_MAX]; }; static unsigned btree_node_sectors_written(struct bch_fs *c, void *data) { struct btree_node *bn = data; struct btree_node_entry *bne; unsigned offset = 0; if (le64_to_cpu(bn->magic) != bset_magic(c)) return 0; while (offset < btree_sectors(c)) { if (!offset) { offset += vstruct_sectors(bn, c->block_bits); } else { bne = data + (offset << 9); if (bne->keys.seq != bn->keys.seq) break; offset += vstruct_sectors(bne, c->block_bits); } } return offset; } static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void *data) { struct btree_node *bn = data; struct btree_node_entry *bne; if (!offset) return false; while (offset < btree_sectors(c)) { bne = data + (offset << 9); if (bne->keys.seq == bn->keys.seq) return true; offset++; } return false; return offset; } static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) { closure_type(ra, struct btree_node_read_all, cl); struct bch_fs *c = ra->c; struct btree *b = ra->b; struct printbuf buf = PRINTBUF; bool dump_bset_maps = false; bool have_retry = false; int ret = 0, best = -1, write = READ; unsigned i, written = 0, written2 = 0; __le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2 ? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0; bool _saw_error = false, *saw_error = &_saw_error; for (i = 0; i < ra->nr; i++) { struct btree_node *bn = ra->buf[i]; if (ra->err[i]) continue; if (le64_to_cpu(bn->magic) != bset_magic(c) || (seq && seq != bn->keys.seq)) continue; if (best < 0) { best = i; written = btree_node_sectors_written(c, bn); continue; } written2 = btree_node_sectors_written(c, ra->buf[i]); if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL, NULL, btree_node_replicas_sectors_written_mismatch, "btree node sectors written mismatch: %u != %u", written, written2) || btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]), -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL, NULL, btree_node_bset_after_end, "found bset signature after last bset") || btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9), -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL, NULL, btree_node_replicas_data_mismatch, "btree node replicas content mismatch")) dump_bset_maps = true; if (written2 > written) { written = written2; best = i; } } fsck_err: if (dump_bset_maps) { for (i = 0; i < ra->nr; i++) { struct btree_node *bn = ra->buf[i]; struct btree_node_entry *bne = NULL; unsigned offset = 0, sectors; bool gap = false; if (ra->err[i]) continue; printbuf_reset(&buf); while (offset < btree_sectors(c)) { if (!offset) { sectors = vstruct_sectors(bn, c->block_bits); } else { bne = ra->buf[i] + (offset << 9); if (bne->keys.seq != bn->keys.seq) break; sectors = vstruct_sectors(bne, c->block_bits); } prt_printf(&buf, " %u-%u", offset, offset + sectors); if (bne && bch2_journal_seq_is_blacklisted(c, le64_to_cpu(bne->keys.journal_seq), false)) prt_printf(&buf, "*"); offset += sectors; } while (offset < btree_sectors(c)) { bne = ra->buf[i] + (offset << 9); if (bne->keys.seq == bn->keys.seq) { if (!gap) prt_printf(&buf, " GAP"); gap = true; sectors = vstruct_sectors(bne, c->block_bits); prt_printf(&buf, " %u-%u", offset, offset + sectors); if (bch2_journal_seq_is_blacklisted(c, le64_to_cpu(bne->keys.journal_seq), false)) prt_printf(&buf, "*"); } offset++; } bch_err(c, "replica %u:%s", i, buf.buf); } } if (best >= 0) { memcpy(b->data, ra->buf[best], btree_buf_bytes(b)); ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error); } else { ret = -1; } if (ret) { set_btree_node_read_error(b); bch2_btree_lost_data(c, b->c.btree_id); } else if (*saw_error) bch2_btree_node_rewrite_async(c, b); for (i = 0; i < ra->nr; i++) { mempool_free(ra->buf[i], &c->btree_bounce_pool); bio_put(ra->bio[i]); } closure_debug_destroy(&ra->cl); kfree(ra); printbuf_exit(&buf); clear_btree_node_read_in_flight(b); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } static void btree_node_read_all_replicas_endio(struct bio *bio) { struct btree_read_bio *rb = container_of(bio, struct btree_read_bio, bio); struct bch_fs *c = rb->c; struct btree_node_read_all *ra = rb->ra; if (rb->have_ioref) { struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev); bch2_latency_acct(ca, rb->start_time, READ); } ra->err[rb->idx] = bio->bi_status; closure_put(&ra->cl); } /* * XXX This allocates multiple times from the same mempools, and can deadlock * under sufficient memory pressure (but is only a debug path) */ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool sync) { struct bkey_s_c k = bkey_i_to_s_c(&b->key); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded pick; struct btree_node_read_all *ra; unsigned i; ra = kzalloc(sizeof(*ra), GFP_NOFS); if (!ra) return -BCH_ERR_ENOMEM_btree_node_read_all_replicas; closure_init(&ra->cl, NULL); ra->c = c; ra->b = b; ra->nr = bch2_bkey_nr_ptrs(k); for (i = 0; i < ra->nr; i++) { ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); ra->bio[i] = bio_alloc_bioset(NULL, buf_pages(ra->buf[i], btree_buf_bytes(b)), REQ_OP_READ|REQ_SYNC|REQ_META, GFP_NOFS, &c->btree_bio); } i = 0; bkey_for_each_ptr_decode(k.k, ptrs, pick, entry) { struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ); struct btree_read_bio *rb = container_of(ra->bio[i], struct btree_read_bio, bio); rb->c = c; rb->b = b; rb->ra = ra; rb->start_time = local_clock(); rb->have_ioref = ca != NULL; rb->idx = i; rb->pick = pick; rb->bio.bi_iter.bi_sector = pick.ptr.offset; rb->bio.bi_end_io = btree_node_read_all_replicas_endio; bch2_bio_map(&rb->bio, ra->buf[i], btree_buf_bytes(b)); if (rb->have_ioref) { this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], bio_sectors(&rb->bio)); bio_set_dev(&rb->bio, ca->disk_sb.bdev); closure_get(&ra->cl); submit_bio(&rb->bio); } else { ra->err[i] = BLK_STS_REMOVED; } i++; } if (sync) { closure_sync(&ra->cl); btree_node_read_all_replicas_done(&ra->cl.work); } else { continue_at(&ra->cl, btree_node_read_all_replicas_done, c->btree_read_complete_wq); } return 0; } void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, bool sync) { struct bch_fs *c = trans->c; struct extent_ptr_decoded pick; struct btree_read_bio *rb; struct bch_dev *ca; struct bio *bio; int ret; trace_and_count(c, btree_node_read, trans, b); if (bch2_verify_all_btree_replicas && !btree_node_read_all_replicas(c, b, sync)) return; ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick); if (ret <= 0) { struct printbuf buf = PRINTBUF; prt_str(&buf, "btree node read error: no device to read from\n at "); bch2_btree_pos_to_text(&buf, c, b); bch_err_ratelimited(c, "%s", buf.buf); if (c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) bch2_fatal_error(c); set_btree_node_read_error(b); bch2_btree_lost_data(c, b->c.btree_id); clear_btree_node_read_in_flight(b); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); printbuf_exit(&buf); return; } ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ); bio = bio_alloc_bioset(NULL, buf_pages(b->data, btree_buf_bytes(b)), REQ_OP_READ|REQ_SYNC|REQ_META, GFP_NOFS, &c->btree_bio); rb = container_of(bio, struct btree_read_bio, bio); rb->c = c; rb->b = b; rb->ra = NULL; rb->start_time = local_clock(); rb->have_ioref = ca != NULL; rb->pick = pick; INIT_WORK(&rb->work, btree_node_read_work); bio->bi_iter.bi_sector = pick.ptr.offset; bio->bi_end_io = btree_node_read_endio; bch2_bio_map(bio, b->data, btree_buf_bytes(b)); if (rb->have_ioref) { this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], bio_sectors(bio)); bio_set_dev(bio, ca->disk_sb.bdev); if (sync) { submit_bio_wait(bio); bch2_latency_acct(ca, rb->start_time, READ); btree_node_read_work(&rb->work); } else { submit_bio(bio); } } else { bio->bi_status = BLK_STS_REMOVED; if (sync) btree_node_read_work(&rb->work); else queue_work(c->btree_read_complete_wq, &rb->work); } } static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, const struct bkey_i *k, unsigned level) { struct bch_fs *c = trans->c; struct closure cl; struct btree *b; int ret; closure_init_stack(&cl); do { ret = bch2_btree_cache_cannibalize_lock(trans, &cl); closure_sync(&cl); } while (ret); b = bch2_btree_node_mem_alloc(trans, level != 0); bch2_btree_cache_cannibalize_unlock(trans); BUG_ON(IS_ERR(b)); bkey_copy(&b->key, k); BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id)); set_btree_node_read_in_flight(b); /* we can't pass the trans to read_done() for fsck errors, so it must be unlocked */ bch2_trans_unlock(trans); bch2_btree_node_read(trans, b, true); if (btree_node_read_error(b)) { mutex_lock(&c->btree_cache.lock); bch2_btree_node_hash_remove(&c->btree_cache, b); mutex_unlock(&c->btree_cache.lock); ret = -BCH_ERR_btree_node_read_error; goto err; } bch2_btree_set_root_for_read(c, b); err: six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); return ret; } int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, const struct bkey_i *k, unsigned level) { return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level)); } static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b, struct btree_write *w) { unsigned long old, new; old = READ_ONCE(b->will_make_reachable); do { new = old; if (!(old & 1)) break; new &= ~1UL; } while (!try_cmpxchg(&b->will_make_reachable, &old, new)); if (old & 1) closure_put(&((struct btree_update *) new)->cl); bch2_journal_pin_drop(&c->journal, &w->journal); } static void __btree_node_write_done(struct bch_fs *c, struct btree *b) { struct btree_write *w = btree_prev_write(b); unsigned long old, new; unsigned type = 0; bch2_btree_complete_write(c, b, w); old = READ_ONCE(b->flags); do { new = old; if ((old & (1U << BTREE_NODE_dirty)) && (old & (1U << BTREE_NODE_need_write)) && !(old & (1U << BTREE_NODE_never_write)) && !(old & (1U << BTREE_NODE_write_blocked)) && !(old & (1U << BTREE_NODE_will_make_reachable))) { new &= ~(1U << BTREE_NODE_dirty); new &= ~(1U << BTREE_NODE_need_write); new |= (1U << BTREE_NODE_write_in_flight); new |= (1U << BTREE_NODE_write_in_flight_inner); new |= (1U << BTREE_NODE_just_written); new ^= (1U << BTREE_NODE_write_idx); type = new & BTREE_WRITE_TYPE_MASK; new &= ~BTREE_WRITE_TYPE_MASK; } else { new &= ~(1U << BTREE_NODE_write_in_flight); new &= ~(1U << BTREE_NODE_write_in_flight_inner); } } while (!try_cmpxchg(&b->flags, &old, new)); if (new & (1U << BTREE_NODE_write_in_flight)) __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED|type); else wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); } static void btree_node_write_done(struct bch_fs *c, struct btree *b) { struct btree_trans *trans = bch2_trans_get(c); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); /* we don't need transaction context anymore after we got the lock. */ bch2_trans_put(trans); __btree_node_write_done(c, b); six_unlock_read(&b->c.lock); } static void btree_node_write_work(struct work_struct *work) { struct btree_write_bio *wbio = container_of(work, struct btree_write_bio, work); struct bch_fs *c = wbio->wbio.c; struct btree *b = wbio->wbio.bio.bi_private; int ret = 0; btree_bounce_free(c, wbio->data_bytes, wbio->wbio.used_mempool, wbio->data); bch2_bkey_drop_ptrs(bkey_i_to_s(&wbio->key), ptr, bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev)); if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) { ret = -BCH_ERR_btree_node_write_all_failed; goto err; } if (wbio->wbio.first_btree_write) { if (wbio->wbio.failed.nr) { } } else { ret = bch2_trans_do(c, bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, BCH_WATERMARK_interior_updates| BCH_TRANS_COMMIT_journal_reclaim| BCH_TRANS_COMMIT_no_enospc| BCH_TRANS_COMMIT_no_check_rw, !wbio->wbio.failed.nr)); if (ret) goto err; } out: bio_put(&wbio->wbio.bio); btree_node_write_done(c, b); return; err: set_btree_node_noevict(b); bch2_fs_fatal_err_on(!bch2_err_matches(ret, EROFS), c, "writing btree node: %s", bch2_err_str(ret)); goto out; } static void btree_node_write_endio(struct bio *bio) { struct bch_write_bio *wbio = to_wbio(bio); struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL; struct bch_write_bio *orig = parent ?: wbio; struct btree_write_bio *wb = container_of(orig, struct btree_write_bio, wbio); struct bch_fs *c = wbio->c; struct btree *b = wbio->bio.bi_private; struct bch_dev *ca = wbio->have_ioref ? bch2_dev_have_ref(c, wbio->dev) : NULL; unsigned long flags; if (wbio->have_ioref) bch2_latency_acct(ca, wbio->submit_time, WRITE); if (!ca || bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, "btree write error: %s", bch2_blk_status_to_str(bio->bi_status)) || bch2_meta_write_fault("btree")) { spin_lock_irqsave(&c->btree_write_error_lock, flags); bch2_dev_list_add_dev(&orig->failed, wbio->dev); spin_unlock_irqrestore(&c->btree_write_error_lock, flags); } if (wbio->have_ioref) percpu_ref_put(&ca->io_ref); if (parent) { bio_put(bio); bio_endio(&parent->bio); return; } clear_btree_node_write_in_flight_inner(b); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner); INIT_WORK(&wb->work, btree_node_write_work); queue_work(c->btree_io_complete_wq, &wb->work); } static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { bool saw_error; int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), (struct bkey_validate_context) { .from = BKEY_VALIDATE_btree_node, .level = b->c.level + 1, .btree = b->c.btree_id, .flags = BCH_VALIDATE_write, }); if (ret) { bch2_fs_inconsistent(c, "invalid btree node key before write"); return ret; } ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?: validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error); if (ret) { bch2_inconsistent_error(c); dump_stack(); } return ret; } static void btree_write_submit(struct work_struct *work) { struct btree_write_bio *wbio = container_of(work, struct btree_write_bio, work); BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; bkey_copy(&tmp.k, &wbio->key); bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&tmp.k)), ptr) ptr->offset += wbio->sector_offset; bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, &tmp.k, false); } void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) { struct btree_write_bio *wbio; struct bset *i; struct btree_node *bn = NULL; struct btree_node_entry *bne = NULL; struct sort_iter_stack sort_iter; struct nonce nonce; unsigned bytes_to_write, sectors_to_write, bytes, u64s; u64 seq = 0; bool used_mempool; unsigned long old, new; bool validate_before_checksum = false; enum btree_write_type type = flags & BTREE_WRITE_TYPE_MASK; void *data; int ret; if (flags & BTREE_WRITE_ALREADY_STARTED) goto do_write; /* * We may only have a read lock on the btree node - the dirty bit is our * "lock" against racing with other threads that may be trying to start * a write, we do a write iff we clear the dirty bit. Since setting the * dirty bit requires a write lock, we can't race with other threads * redirtying it: */ old = READ_ONCE(b->flags); do { new = old; if (!(old & (1 << BTREE_NODE_dirty))) return; if ((flags & BTREE_WRITE_ONLY_IF_NEED) && !(old & (1 << BTREE_NODE_need_write))) return; if (old & ((1 << BTREE_NODE_never_write)| (1 << BTREE_NODE_write_blocked))) return; if (b->written && (old & (1 << BTREE_NODE_will_make_reachable))) return; if (old & (1 << BTREE_NODE_write_in_flight)) return; if (flags & BTREE_WRITE_ONLY_IF_NEED) type = new & BTREE_WRITE_TYPE_MASK; new &= ~BTREE_WRITE_TYPE_MASK; new &= ~(1 << BTREE_NODE_dirty); new &= ~(1 << BTREE_NODE_need_write); new |= (1 << BTREE_NODE_write_in_flight); new |= (1 << BTREE_NODE_write_in_flight_inner); new |= (1 << BTREE_NODE_just_written); new ^= (1 << BTREE_NODE_write_idx); } while (!try_cmpxchg_acquire(&b->flags, &old, new)); if (new & (1U << BTREE_NODE_need_write)) return; do_write: BUG_ON((type == BTREE_WRITE_initial) != (b->written == 0)); atomic_long_dec(&c->btree_cache.nr_dirty); BUG_ON(btree_node_fake(b)); BUG_ON((b->will_make_reachable != 0) != !b->written); BUG_ON(b->written >= btree_sectors(c)); BUG_ON(b->written & (block_sectors(c) - 1)); BUG_ON(bset_written(b, btree_bset_last(b))); BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c)); BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format))); bch2_sort_whiteouts(c, b); sort_iter_stack_init(&sort_iter, b); bytes = !b->written ? sizeof(struct btree_node) : sizeof(struct btree_node_entry); bytes += b->whiteout_u64s * sizeof(u64); for_each_bset(b, t) { i = bset(b, t); if (bset_written(b, i)) continue; bytes += le16_to_cpu(i->u64s) * sizeof(u64); sort_iter_add(&sort_iter.iter, btree_bkey_first(b, t), btree_bkey_last(b, t)); seq = max(seq, le64_to_cpu(i->journal_seq)); } BUG_ON(b->written && !seq); /* bch2_varint_decode may read up to 7 bytes past the end of the buffer: */ bytes += 8; /* buffer must be a multiple of the block size */ bytes = round_up(bytes, block_bytes(c)); data = btree_bounce_alloc(c, bytes, &used_mempool); if (!b->written) { bn = data; *bn = *b->data; i = &bn->keys; } else { bne = data; bne->keys = b->data->keys; i = &bne->keys; } i->journal_seq = cpu_to_le64(seq); i->u64s = 0; sort_iter_add(&sort_iter.iter, unwritten_whiteouts_start(b), unwritten_whiteouts_end(b)); SET_BSET_SEPARATE_WHITEOUTS(i, false); u64s = bch2_sort_keys_keep_unwritten_whiteouts(i->start, &sort_iter.iter); le16_add_cpu(&i->u64s, u64s); b->whiteout_u64s = 0; BUG_ON(!b->written && i->u64s != b->data->keys.u64s); set_needs_whiteout(i, false); /* do we have data to write? */ if (b->written && !i->u64s) goto nowrite; bytes_to_write = vstruct_end(i) - data; sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9; if (!b->written && b->key.k.type == KEY_TYPE_btree_ptr_v2) BUG_ON(btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)) != sectors_to_write); memset(data + bytes_to_write, 0, (sectors_to_write << 9) - bytes_to_write); BUG_ON(b->written + sectors_to_write > btree_sectors(c)); BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN); BUG_ON(i->seq != b->data->keys.seq); i->version = cpu_to_le16(c->sb.version); SET_BSET_OFFSET(i, b->written); SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c)); if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i))) validate_before_checksum = true; /* validate_bset will be modifying: */ if (le16_to_cpu(i->version) < bcachefs_metadata_version_current) validate_before_checksum = true; /* if we're going to be encrypting, check metadata validity first: */ if (validate_before_checksum && validate_bset_for_write(c, b, i, sectors_to_write)) goto err; ret = bset_encrypt(c, i, b->written << 9); if (bch2_fs_fatal_err_on(ret, c, "encrypting btree node: %s", bch2_err_str(ret))) goto err; nonce = btree_nonce(i, b->written << 9); if (bn) bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn); else bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); /* if we're not encrypting, check metadata after checksumming: */ if (!validate_before_checksum && validate_bset_for_write(c, b, i, sectors_to_write)) goto err; /* * We handle btree write errors by immediately halting the journal - * after we've done that, we can't issue any subsequent btree writes * because they might have pointers to new nodes that failed to write. * * Furthermore, there's no point in doing any more btree writes because * with the journal stopped, we're never going to update the journal to * reflect that those writes were done and the data flushed from the * journal: * * Also on journal error, the pending write may have updates that were * never journalled (interior nodes, see btree_update_nodes_written()) - * it's critical that we don't do the write in that case otherwise we * will have updates visible that weren't in the journal: * * Make sure to update b->written so bch2_btree_init_next() doesn't * break: */ if (bch2_journal_error(&c->journal) || c->opts.nochanges) goto err; trace_and_count(c, btree_node_write, b, bytes_to_write, sectors_to_write); wbio = container_of(bio_alloc_bioset(NULL, buf_pages(data, sectors_to_write << 9), REQ_OP_WRITE|REQ_META, GFP_NOFS, &c->btree_bio), struct btree_write_bio, wbio.bio); wbio_init(&wbio->wbio.bio); wbio->data = data; wbio->data_bytes = bytes; wbio->sector_offset = b->written; wbio->wbio.c = c; wbio->wbio.used_mempool = used_mempool; wbio->wbio.first_btree_write = !b->written; wbio->wbio.bio.bi_end_io = btree_node_write_endio; wbio->wbio.bio.bi_private = b; bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9); bkey_copy(&wbio->key, &b->key); b->written += sectors_to_write; if (wbio->key.k.type == KEY_TYPE_btree_ptr_v2) bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written = cpu_to_le16(b->written); atomic64_inc(&c->btree_write_stats[type].nr); atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes); INIT_WORK(&wbio->work, btree_write_submit); queue_work(c->btree_write_submit_wq, &wbio->work); return; err: set_btree_node_noevict(b); b->written += sectors_to_write; nowrite: btree_bounce_free(c, bytes, used_mempool, data); __btree_node_write_done(c, b); } /* * Work that must be done with write lock held: */ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) { bool invalidated_iter = false; struct btree_node_entry *bne; if (!btree_node_just_written(b)) return false; BUG_ON(b->whiteout_u64s); clear_btree_node_just_written(b); /* * Note: immediately after write, bset_written() doesn't work - the * amount of data we had to write after compaction might have been * smaller than the offset of the last bset. * * However, we know that all bsets have been written here, as long as * we're still holding the write lock: */ /* * XXX: decide if we really want to unconditionally sort down to a * single bset: */ if (b->nsets > 1) { btree_node_sort(c, b, 0, b->nsets); invalidated_iter = true; } else { invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL); } for_each_bset(b, t) set_needs_whiteout(bset(b, t), true); bch2_btree_verify(c, b); /* * If later we don't unconditionally sort down to a single bset, we have * to ensure this is still true: */ BUG_ON((void *) btree_bkey_last(b, bset_tree_last(b)) > write_block(b)); bne = want_new_bset(c, b); if (bne) bch2_bset_init_next(b, bne); bch2_btree_build_aux_trees(b); return invalidated_iter; } /* * Use this one if the node is intent locked: */ void bch2_btree_node_write(struct bch_fs *c, struct btree *b, enum six_lock_type lock_type_held, unsigned flags) { if (lock_type_held == SIX_LOCK_intent || (lock_type_held == SIX_LOCK_read && six_lock_tryupgrade(&b->c.lock))) { __bch2_btree_node_write(c, b, flags); /* don't cycle lock unnecessarily: */ if (btree_node_just_written(b) && six_trylock_write(&b->c.lock)) { bch2_btree_post_write_cleanup(c, b); six_unlock_write(&b->c.lock); } if (lock_type_held == SIX_LOCK_read) six_lock_downgrade(&b->c.lock); } else { __bch2_btree_node_write(c, b, flags); if (lock_type_held == SIX_LOCK_write && btree_node_just_written(b)) bch2_btree_post_write_cleanup(c, b); } } void bch2_btree_node_write_trans(struct btree_trans *trans, struct btree *b, enum six_lock_type lock_type_held, unsigned flags) { struct bch_fs *c = trans->c; if (lock_type_held == SIX_LOCK_intent || (lock_type_held == SIX_LOCK_read && six_lock_tryupgrade(&b->c.lock))) { __bch2_btree_node_write(c, b, flags); /* don't cycle lock unnecessarily: */ if (btree_node_just_written(b) && six_trylock_write(&b->c.lock)) { bch2_btree_post_write_cleanup(c, b); __bch2_btree_node_unlock_write(trans, b); } if (lock_type_held == SIX_LOCK_read) six_lock_downgrade(&b->c.lock); } else { __bch2_btree_node_write(c, b, flags); if (lock_type_held == SIX_LOCK_write && btree_node_just_written(b)) bch2_btree_post_write_cleanup(c, b); } } static bool __bch2_btree_flush_all(struct bch_fs *c, unsigned flag) { struct bucket_table *tbl; struct rhash_head *pos; struct btree *b; unsigned i; bool ret = false; restart: rcu_read_lock(); for_each_cached_btree(b, c, tbl, i, pos) if (test_bit(flag, &b->flags)) { rcu_read_unlock(); wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE); ret = true; goto restart; } rcu_read_unlock(); return ret; } bool bch2_btree_flush_all_reads(struct bch_fs *c) { return __bch2_btree_flush_all(c, BTREE_NODE_read_in_flight); } bool bch2_btree_flush_all_writes(struct bch_fs *c) { return __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight); } static const char * const bch2_btree_write_types[] = { #define x(t, n) [n] = #t, BCH_BTREE_WRITE_TYPES() NULL }; void bch2_btree_write_stats_to_text(struct printbuf *out, struct bch_fs *c) { printbuf_tabstop_push(out, 20); printbuf_tabstop_push(out, 10); prt_printf(out, "\tnr\tsize\n"); for (unsigned i = 0; i < BTREE_WRITE_TYPE_NR; i++) { u64 nr = atomic64_read(&c->btree_write_stats[i].nr); u64 bytes = atomic64_read(&c->btree_write_stats[i].bytes); prt_printf(out, "%s:\t%llu\t", bch2_btree_write_types[i], nr); prt_human_readable_u64(out, nr ? div64_u64(bytes, nr) : 0); prt_newline(out); } }
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 // SPDX-License-Identifier: GPL-2.0-only /* DVB USB framework compliant Linux driver for the Hauppauge WinTV-NOVA-T usb2 * DVB-T receiver. * * Copyright (C) 2004-5 Patrick Boettcher (patrick.boettcher@posteo.de) * * see Documentation/driver-api/media/drivers/dvb-usb.rst for more information */ #include "dibusb.h" static int debug; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "set debugging level (1=rc,2=eeprom (|-able))." DVB_USB_DEBUG_STATUS); DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); #define deb_rc(args...) dprintk(debug,0x01,args) #define deb_ee(args...) dprintk(debug,0x02,args) /* Hauppauge NOVA-T USB2 keys */ static struct rc_map_table rc_map_haupp_table[] = { { 0x1e00, KEY_0 }, { 0x1e01, KEY_1 }, { 0x1e02, KEY_2 }, { 0x1e03, KEY_3 }, { 0x1e04, KEY_4 }, { 0x1e05, KEY_5 }, { 0x1e06, KEY_6 }, { 0x1e07, KEY_7 }, { 0x1e08, KEY_8 }, { 0x1e09, KEY_9 }, { 0x1e0a, KEY_KPASTERISK }, { 0x1e0b, KEY_RED }, { 0x1e0c, KEY_RADIO }, { 0x1e0d, KEY_MENU }, { 0x1e0e, KEY_GRAVE }, /* # */ { 0x1e0f, KEY_MUTE }, { 0x1e10, KEY_VOLUMEUP }, { 0x1e11, KEY_VOLUMEDOWN }, { 0x1e12, KEY_CHANNEL }, { 0x1e14, KEY_UP }, { 0x1e15, KEY_DOWN }, { 0x1e16, KEY_LEFT }, { 0x1e17, KEY_RIGHT }, { 0x1e18, KEY_VIDEO }, { 0x1e19, KEY_AUDIO }, { 0x1e1a, KEY_IMAGES }, { 0x1e1b, KEY_EPG }, { 0x1e1c, KEY_TV }, { 0x1e1e, KEY_NEXT }, { 0x1e1f, KEY_BACK }, { 0x1e20, KEY_CHANNELUP }, { 0x1e21, KEY_CHANNELDOWN }, { 0x1e24, KEY_LAST }, /* Skip backwards */ { 0x1e25, KEY_OK }, { 0x1e29, KEY_BLUE}, { 0x1e2e, KEY_GREEN }, { 0x1e30, KEY_PAUSE }, { 0x1e32, KEY_REWIND }, { 0x1e34, KEY_FASTFORWARD }, { 0x1e35, KEY_PLAY }, { 0x1e36, KEY_STOP }, { 0x1e37, KEY_RECORD }, { 0x1e38, KEY_YELLOW }, { 0x1e3b, KEY_GOTO }, { 0x1e3d, KEY_POWER }, }; /* Firmware bug? sometimes, when a new key is pressed, the previous pressed key * is delivered. No workaround yet, maybe a new firmware. */ static int nova_t_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { u8 *buf, data, toggle, custom; u16 raw; int i, ret; struct dibusb_device_state *st = d->priv; buf = kmalloc(5, GFP_KERNEL); if (!buf) return -ENOMEM; buf[0] = DIBUSB_REQ_POLL_REMOTE; buf[1] = 0x35; ret = dvb_usb_generic_rw(d, buf, 2, buf, 5, 0); if (ret < 0) goto ret; *state = REMOTE_NO_KEY_PRESSED; switch (buf[0]) { case DIBUSB_RC_HAUPPAUGE_KEY_PRESSED: raw = ((buf[1] << 8) | buf[2]) >> 3; toggle = !!(raw & 0x800); data = raw & 0x3f; custom = (raw >> 6) & 0x1f; deb_rc("raw key code 0x%02x, 0x%02x, 0x%02x to c: %02x d: %02x toggle: %d\n", buf[1], buf[2], buf[3], custom, data, toggle); for (i = 0; i < ARRAY_SIZE(rc_map_haupp_table); i++) { if (rc5_data(&rc_map_haupp_table[i]) == data && rc5_custom(&rc_map_haupp_table[i]) == custom) { deb_rc("c: %x, d: %x\n", rc5_data(&rc_map_haupp_table[i]), rc5_custom(&rc_map_haupp_table[i])); *event = rc_map_haupp_table[i].keycode; *state = REMOTE_KEY_PRESSED; if (st->old_toggle == toggle) { if (st->last_repeat_count++ < 2) *state = REMOTE_NO_KEY_PRESSED; } else { st->last_repeat_count = 0; st->old_toggle = toggle; } break; } } break; case DIBUSB_RC_HAUPPAUGE_KEY_EMPTY: default: break; } ret: kfree(buf); return ret; } static int nova_t_read_mac_address (struct dvb_usb_device *d, u8 mac[6]) { int i, ret; u8 b; mac[0] = 0x00; mac[1] = 0x0d; mac[2] = 0xfe; /* this is a complete guess, but works for my box */ for (i = 136; i < 139; i++) { ret = dibusb_read_eeprom_byte(d, i, &b); if (ret) return ret; mac[5 - (i - 136)] = b; } return 0; } /* USB Driver stuff */ static struct dvb_usb_device_properties nova_t_properties; static int nova_t_probe(struct usb_interface *intf, const struct usb_device_id *id) { return dvb_usb_device_init(intf, &nova_t_properties, THIS_MODULE, NULL, adapter_nr); } /* do not change the order of the ID table */ enum { HAUPPAUGE_WINTV_NOVA_T_USB2_COLD, HAUPPAUGE_WINTV_NOVA_T_USB2_WARM, }; static struct usb_device_id nova_t_table[] = { DVB_USB_DEV(HAUPPAUGE, HAUPPAUGE_WINTV_NOVA_T_USB2_COLD), DVB_USB_DEV(HAUPPAUGE, HAUPPAUGE_WINTV_NOVA_T_USB2_WARM), { } }; MODULE_DEVICE_TABLE(usb, nova_t_table); static struct dvb_usb_device_properties nova_t_properties = { .caps = DVB_USB_IS_AN_I2C_ADAPTER, .usb_ctrl = CYPRESS_FX2, .firmware = "dvb-usb-nova-t-usb2-02.fw", .num_adapters = 1, .adapter = { { .num_frontends = 1, .fe = {{ .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF, .pid_filter_count = 32, .streaming_ctrl = dibusb2_0_streaming_ctrl, .pid_filter = dibusb_pid_filter, .pid_filter_ctrl = dibusb_pid_filter_ctrl, .frontend_attach = dibusb_dib3000mc_frontend_attach, .tuner_attach = dibusb_dib3000mc_tuner_attach, /* parameter for the MPEG2-data transfer */ .stream = { .type = USB_BULK, .count = 7, .endpoint = 0x06, .u = { .bulk = { .buffersize = 4096, } } }, }}, .size_of_priv = sizeof(struct dibusb_state), } }, .size_of_priv = sizeof(struct dibusb_device_state), .power_ctrl = dibusb2_0_power_ctrl, .read_mac_address = nova_t_read_mac_address, .rc.legacy = { .rc_interval = 100, .rc_map_table = rc_map_haupp_table, .rc_map_size = ARRAY_SIZE(rc_map_haupp_table), .rc_query = nova_t_rc_query, }, .i2c_algo = &dibusb_i2c_algo, .generic_bulk_ctrl_endpoint = 0x01, .num_device_descs = 1, .devices = { { "Hauppauge WinTV-NOVA-T usb2", { &nova_t_table[HAUPPAUGE_WINTV_NOVA_T_USB2_COLD], NULL }, { &nova_t_table[HAUPPAUGE_WINTV_NOVA_T_USB2_WARM], NULL }, }, { NULL }, } }; static struct usb_driver nova_t_driver = { .name = "dvb_usb_nova_t_usb2", .probe = nova_t_probe, .disconnect = dvb_usb_device_exit, .id_table = nova_t_table, }; module_usb_driver(nova_t_driver); MODULE_AUTHOR("Patrick Boettcher <patrick.boettcher@posteo.de>"); MODULE_DESCRIPTION("Hauppauge WinTV-NOVA-T usb2"); MODULE_VERSION("1.0"); MODULE_LICENSE("GPL");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 /* SPDX-License-Identifier: GPL-2.0 * * Legacy blkg rwstat helpers enabled by CONFIG_BLK_CGROUP_RWSTAT. * Do not use in new code. */ #ifndef _BLK_CGROUP_RWSTAT_H #define _BLK_CGROUP_RWSTAT_H #include "blk-cgroup.h" enum blkg_rwstat_type { BLKG_RWSTAT_READ, BLKG_RWSTAT_WRITE, BLKG_RWSTAT_SYNC, BLKG_RWSTAT_ASYNC, BLKG_RWSTAT_DISCARD, BLKG_RWSTAT_NR, BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, }; /* * blkg_[rw]stat->aux_cnt is excluded for local stats but included for * recursive. Used to carry stats of dead children. */ struct blkg_rwstat { struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR]; atomic64_t aux_cnt[BLKG_RWSTAT_NR]; }; struct blkg_rwstat_sample { u64 cnt[BLKG_RWSTAT_NR]; }; static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat, unsigned int idx) { return atomic64_read(&rwstat->aux_cnt[idx]) + percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]); } int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp); void blkg_rwstat_exit(struct blkg_rwstat *rwstat); u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, const struct blkg_rwstat_sample *rwstat); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off); void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, int off, struct blkg_rwstat_sample *sum); /** * blkg_rwstat_add - add a value to a blkg_rwstat * @rwstat: target blkg_rwstat * @opf: REQ_OP and flags * @val: value to add * * Add @val to @rwstat. The counters are chosen according to @rw. The * caller is responsible for synchronizing calls to this function. */ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, blk_opf_t opf, uint64_t val) { struct percpu_counter *cnt; if (op_is_discard(opf)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD]; else if (op_is_write(opf)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); if (op_is_sync(opf)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); } /** * blkg_rwstat_read - read the current values of a blkg_rwstat * @rwstat: blkg_rwstat to read * @result: where to put the current values * * Read the current snapshot of @rwstat and return it in the @result counts. */ static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, struct blkg_rwstat_sample *result) { int i; for (i = 0; i < BLKG_RWSTAT_NR; i++) result->cnt[i] = percpu_counter_sum_positive(&rwstat->cpu_cnt[i]); } /** * blkg_rwstat_total - read the total count of a blkg_rwstat * @rwstat: blkg_rwstat to read * * Return the total count of @rwstat regardless of the IO direction. This * function can be called without synchronization and takes care of u64 * atomicity. */ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) { struct blkg_rwstat_sample tmp = { }; blkg_rwstat_read(rwstat, &tmp); return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; } /** * blkg_rwstat_reset - reset a blkg_rwstat * @rwstat: blkg_rwstat to reset */ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) { int i; for (i = 0; i < BLKG_RWSTAT_NR; i++) { percpu_counter_set(&rwstat->cpu_cnt[i], 0); atomic64_set(&rwstat->aux_cnt[i], 0); } } /** * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count * @to: the destination blkg_rwstat * @from: the source * * Add @from's count including the aux one to @to's aux count. */ static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to, struct blkg_rwstat *from) { u64 sum[BLKG_RWSTAT_NR]; int i; for (i = 0; i < BLKG_RWSTAT_NR; i++) sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]); for (i = 0; i < BLKG_RWSTAT_NR; i++) atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]), &to->aux_cnt[i]); } #endif /* _BLK_CGROUP_RWSTAT_H */
122 144 66 143 144 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_COOKIE_H #define __LINUX_COOKIE_H #include <linux/atomic.h> #include <linux/percpu.h> #include <asm/local.h> struct pcpu_gen_cookie { local_t nesting; u64 last; } __aligned(16); struct gen_cookie { struct pcpu_gen_cookie __percpu *local; atomic64_t forward_last ____cacheline_aligned_in_smp; atomic64_t reverse_last; }; #define COOKIE_LOCAL_BATCH 4096 #define DEFINE_COOKIE(name) \ static DEFINE_PER_CPU(struct pcpu_gen_cookie, __##name); \ static struct gen_cookie name = { \ .local = &__##name, \ .forward_last = ATOMIC64_INIT(0), \ .reverse_last = ATOMIC64_INIT(0), \ } static __always_inline u64 gen_cookie_next(struct gen_cookie *gc) { struct pcpu_gen_cookie *local = this_cpu_ptr(gc->local); u64 val; if (likely(local_inc_return(&local->nesting) == 1)) { val = local->last; if (__is_defined(CONFIG_SMP) && unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) { s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH, &gc->forward_last); val = next - COOKIE_LOCAL_BATCH; } local->last = ++val; } else { val = atomic64_dec_return(&gc->reverse_last); } local_dec(&local->nesting); return val; } #endif /* __LINUX_COOKIE_H */
1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 // SPDX-License-Identifier: GPL-2.0-or-later /* * v4l2-spi - SPI helpers for Video4Linux2 */ #include <linux/module.h> #include <linux/spi/spi.h> #include <media/v4l2-common.h> #include <media/v4l2-device.h> void v4l2_spi_subdev_unregister(struct v4l2_subdev *sd) { struct spi_device *spi = v4l2_get_subdevdata(sd); if (spi && !spi->dev.of_node && !spi->dev.fwnode) spi_unregister_device(spi); } void v4l2_spi_subdev_init(struct v4l2_subdev *sd, struct spi_device *spi, const struct v4l2_subdev_ops *ops) { v4l2_subdev_init(sd, ops); sd->flags |= V4L2_SUBDEV_FL_IS_SPI; /* the owner is the same as the spi_device's driver owner */ sd->owner = spi->dev.driver->owner; sd->dev = &spi->dev; /* spi_device and v4l2_subdev point to one another */ v4l2_set_subdevdata(sd, spi); spi_set_drvdata(spi, sd); /* initialize name */ snprintf(sd->name, sizeof(sd->name), "%s %s", spi->dev.driver->name, dev_name(&spi->dev)); } EXPORT_SYMBOL_GPL(v4l2_spi_subdev_init); struct v4l2_subdev *v4l2_spi_new_subdev(struct v4l2_device *v4l2_dev, struct spi_controller *ctlr, struct spi_board_info *info) { struct v4l2_subdev *sd = NULL; struct spi_device *spi = NULL; if (!v4l2_dev) return NULL; if (info->modalias[0]) request_module(info->modalias); spi = spi_new_device(ctlr, info); if (!spi || !spi->dev.driver) goto error; if (!try_module_get(spi->dev.driver->owner)) goto error; sd = spi_get_drvdata(spi); /* * Register with the v4l2_device which increases the module's * use count as well. */ if (__v4l2_device_register_subdev(v4l2_dev, sd, sd->owner)) sd = NULL; /* Decrease the module use count to match the first try_module_get. */ module_put(spi->dev.driver->owner); error: /* * If we have a client but no subdev, then something went wrong and * we must unregister the client. */ if (!sd) spi_unregister_device(spi); return sd; } EXPORT_SYMBOL_GPL(v4l2_spi_new_subdev);
7 7 7 1 6 2 4 7 10 4 3 1 4 3 2 10 10 10 5 1 9 4 1 7 5 2 3 3 3 2 2 5 4 3 3 14 14 12 12 3 12 12 12 12 14 2 14 5 9 14 21 14 20 19 5 14 2 4 21 86 88 3 88 1 1 40 40 1 38 38 27 38 37 36 6 1 1 37 36 2 40 31 30 30 30 30 4 30 30 30 30 1 31 24 23 23 23 23 1 24 4 4 3 1 4 4 4 3 1 1 4 3 8 8 8 8 8 8 7 5 5 2 2 5 5 5 5 8 25 25 1 25 24 25 9 25 3 4 2 1 1 1 1 1 4 1 7 1 1 7 32 43 39 4 39 29 8 7 2 1 21 20 29 20 26 13 52 49 5 44 7 10 47 15 2 39 2 2 38 37 8 37 19 19 18 17 7 14 1 13 2 14 12 2 11 1 42 42 40 4 40 2 40 4 40 1 40 2 40 3 40 13 40 4 40 1 40 39 38 40 13 2 40 4 3 2 38 1 12 11 11 11 10 5 9 6 1 3 2 1 2 1 43 11 43 4 39 2 41 3 34 27 27 27 39 44 43 16 45 28 28 27 26 11 11 10 8 3 3 3 3 2 3 3 2 2 1 100 116 115 116 114 116 115 115 115 116 116 115 116 116 116 115 116 115 116 116 10 72 72 66 72 72 69 3 2 2 72 31 20 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 // SPDX-License-Identifier: GPL-2.0-or-later /* * Linux NET3: GRE over IP protocol decoder. * * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/capability.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/in.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/if_arp.h> #include <linux/if_vlan.h> #include <linux/init.h> #include <linux/in6.h> #include <linux/inetdevice.h> #include <linux/igmp.h> #include <linux/netfilter_ipv4.h> #include <linux/etherdevice.h> #include <linux/if_ether.h> #include <net/sock.h> #include <net/ip.h> #include <net/icmp.h> #include <net/protocol.h> #include <net/ip_tunnels.h> #include <net/arp.h> #include <net/checksum.h> #include <net/dsfield.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/gre.h> #include <net/dst_metadata.h> #include <net/erspan.h> #include <net/inet_dscp.h> /* Problems & solutions -------------------- 1. The most important issue is detecting local dead loops. They would cause complete host lockup in transmit, which would be "resolved" by stack overflow or, if queueing is enabled, with infinite looping in net_bh. We cannot track such dead loops during route installation, it is infeasible task. The most general solutions would be to keep skb->encapsulation counter (sort of local ttl), and silently drop packet when it expires. It is a good solution, but it supposes maintaining new variable in ALL skb, even if no tunneling is used. Current solution: xmit_recursion breaks dead loops. This is a percpu counter, since when we enter the first ndo_xmit(), cpu migration is forbidden. We force an exit if this counter reaches RECURSION_LIMIT 2. Networking dead loops would not kill routers, but would really kill network. IP hop limit plays role of "t->recursion" in this case, if we copy it from packet being encapsulated to upper header. It is very good solution, but it introduces two problems: - Routing protocols, using packets with ttl=1 (OSPF, RIP2), do not work over tunnels. - traceroute does not work. I planned to relay ICMP from tunnel, so that this problem would be solved and traceroute output would even more informative. This idea appeared to be wrong: only Linux complies to rfc1812 now (yes, guys, Linux is the only true router now :-)), all routers (at least, in neighbourhood of mine) return only 8 bytes of payload. It is the end. Hence, if we want that OSPF worked or traceroute said something reasonable, we should search for another solution. One of them is to parse packet trying to detect inner encapsulation made by our node. It is difficult or even impossible, especially, taking into account fragmentation. TO be short, ttl is not solution at all. Current solution: The solution was UNEXPECTEDLY SIMPLE. We force DF flag on tunnels with preconfigured hop limit, that is ALL. :-) Well, it does not remove the problem completely, but exponential growth of network traffic is changed to linear (branches, that exceed pmtu are pruned) and tunnel mtu rapidly degrades to value <68, where looping stops. Yes, it is not good if there exists a router in the loop, which does not force DF, even when encapsulating packets have DF set. But it is not our problem! Nobody could accuse us, we made all that we could make. Even if it is your gated who injected fatal route to network, even if it were you who configured fatal static route: you are innocent. :-) Alexey Kuznetsov. */ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static struct rtnl_link_ops ipgre_link_ops __read_mostly; static const struct header_ops ipgre_header_ops; static int ipgre_tunnel_init(struct net_device *dev); static void erspan_build_header(struct sk_buff *skb, u32 id, u32 index, bool truncate, bool is_ipv4); static unsigned int ipgre_net_id __read_mostly; static unsigned int gre_tap_net_id __read_mostly; static unsigned int erspan_net_id __read_mostly; static int ipgre_err(struct sk_buff *skb, u32 info, const struct tnl_ptk_info *tpi) { /* All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. Moreover, Cisco "wise men" put GRE key to the third word in GRE header. It makes impossible maintaining even soft state for keyed GRE tunnels with enabled checksum. Tell them "thank you". Well, I wonder, rfc1812 was written by Cisco employee, what the hell these idiots break standards established by themselves??? */ struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn; const struct iphdr *iph; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; unsigned int data_len = 0; struct ip_tunnel *t; if (tpi->proto == htons(ETH_P_TEB)) itn = net_generic(net, gre_tap_net_id); else if (tpi->proto == htons(ETH_P_ERSPAN) || tpi->proto == htons(ETH_P_ERSPAN2)) itn = net_generic(net, erspan_net_id); else itn = net_generic(net, ipgre_net_id); iph = (const struct iphdr *)(icmp_hdr(skb) + 1); t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, iph->daddr, iph->saddr, tpi->key); if (!t) return -ENOENT; switch (type) { default: case ICMP_PARAMETERPROB: return 0; case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, I believe they are just ether pollution. --ANK */ break; } break; case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) return 0; data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ break; case ICMP_REDIRECT: break; } #if IS_ENABLED(CONFIG_IPV6) if (tpi->proto == htons(ETH_P_IPV6) && !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, type, data_len)) return 0; #endif if (t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) return 0; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) return 0; if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; t->err_time = jiffies; return 0; } static void gre_err(struct sk_buff *skb, u32 info) { /* All the routers (except for Linux) return only * 8 bytes of packet payload. It means, that precise relaying of * ICMP in the real Internet is absolutely infeasible. * * Moreover, Cisco "wise men" put GRE key to the third word * in GRE header. It makes impossible maintaining even soft * state for keyed * GRE tunnels with enabled checksum. Tell them "thank you". * * Well, I wonder, rfc1812 was written by Cisco employee, * what the hell these idiots break standards established * by themselves??? */ const struct iphdr *iph = (struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct tnl_ptk_info tpi; if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP), iph->ihl * 4) < 0) return; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, skb->dev->ifindex, IPPROTO_GRE); return; } if (type == ICMP_REDIRECT) { ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, IPPROTO_GRE); return; } ipgre_err(skb, info, &tpi); } static bool is_erspan_type1(int gre_hdr_len) { /* Both ERSPAN type I (version 0) and type II (version 1) use * protocol 0x88BE, but the type I has only 4-byte GRE header, * while type II has 8-byte. */ return gre_hdr_len == 4; } static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, int gre_hdr_len) { struct net *net = dev_net(skb->dev); struct metadata_dst *tun_dst = NULL; struct erspan_base_hdr *ershdr; IP_TUNNEL_DECLARE_FLAGS(flags); struct ip_tunnel_net *itn; struct ip_tunnel *tunnel; const struct iphdr *iph; struct erspan_md2 *md2; int ver; int len; ip_tunnel_flags_copy(flags, tpi->flags); itn = net_generic(net, erspan_net_id); iph = ip_hdr(skb); if (is_erspan_type1(gre_hdr_len)) { ver = 0; __set_bit(IP_TUNNEL_NO_KEY_BIT, flags); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->saddr, iph->daddr, 0); } else { if (unlikely(!pskb_may_pull(skb, gre_hdr_len + sizeof(*ershdr)))) return PACKET_REJECT; ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); ver = ershdr->ver; iph = ip_hdr(skb); __set_bit(IP_TUNNEL_KEY_BIT, flags); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->saddr, iph->daddr, tpi->key); } if (tunnel) { if (is_erspan_type1(gre_hdr_len)) len = gre_hdr_len; else len = gre_hdr_len + erspan_hdr_len(ver); if (unlikely(!pskb_may_pull(skb, len))) return PACKET_REJECT; if (__iptunnel_pull_header(skb, len, htons(ETH_P_TEB), false, false) < 0) goto drop; if (tunnel->collect_md) { struct erspan_metadata *pkt_md, *md; struct ip_tunnel_info *info; unsigned char *gh; __be64 tun_id; __set_bit(IP_TUNNEL_KEY_BIT, tpi->flags); ip_tunnel_flags_copy(flags, tpi->flags); tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ip_tun_rx_dst(skb, flags, tun_id, sizeof(*md)); if (!tun_dst) return PACKET_REJECT; /* skb can be uncloned in __iptunnel_pull_header, so * old pkt_md is no longer valid and we need to reset * it */ gh = skb_network_header(skb) + skb_network_header_len(skb); pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len + sizeof(*ershdr)); md = ip_tunnel_info_opts(&tun_dst->u.tun_info); md->version = ver; md2 = &md->u.md2; memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE); info = &tun_dst->u.tun_info; __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags); info->options_len = sizeof(*md); } skb_reset_mac_header(skb); ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); return PACKET_RCVD; } return PACKET_REJECT; drop: kfree_skb(skb); return PACKET_RCVD; } static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct ip_tunnel_net *itn, int hdr_len, bool raw_proto) { struct metadata_dst *tun_dst = NULL; const struct iphdr *iph; struct ip_tunnel *tunnel; iph = ip_hdr(skb); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, iph->saddr, iph->daddr, tpi->key); if (tunnel) { const struct iphdr *tnl_params; if (__iptunnel_pull_header(skb, hdr_len, tpi->proto, raw_proto, false) < 0) goto drop; /* Special case for ipgre_header_parse(), which expects the * mac_header to point to the outer IP header. */ if (tunnel->dev->header_ops == &ipgre_header_ops) skb_pop_mac_header(skb); else skb_reset_mac_header(skb); tnl_params = &tunnel->parms.iph; if (tunnel->collect_md || tnl_params->daddr == 0) { IP_TUNNEL_DECLARE_FLAGS(flags) = { }; __be64 tun_id; __set_bit(IP_TUNNEL_CSUM_BIT, flags); __set_bit(IP_TUNNEL_KEY_BIT, flags); ip_tunnel_flags_and(flags, tpi->flags, flags); tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); if (!tun_dst) return PACKET_REJECT; } ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); return PACKET_RCVD; } return PACKET_NEXT; drop: kfree_skb(skb); return PACKET_RCVD; } static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, int hdr_len) { struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn; int res; if (tpi->proto == htons(ETH_P_TEB)) itn = net_generic(net, gre_tap_net_id); else itn = net_generic(net, ipgre_net_id); res = __ipgre_rcv(skb, tpi, itn, hdr_len, false); if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) { /* ipgre tunnels in collect metadata mode should receive * also ETH_P_TEB traffic. */ itn = net_generic(net, ipgre_net_id); res = __ipgre_rcv(skb, tpi, itn, hdr_len, true); } return res; } static int gre_rcv(struct sk_buff *skb) { struct tnl_ptk_info tpi; bool csum_err = false; int hdr_len; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { /* Looped back packet, drop it! */ if (rt_is_output_route(skb_rtable(skb))) goto drop; } #endif hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0); if (hdr_len < 0) goto drop; if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || tpi.proto == htons(ETH_P_ERSPAN2))) { if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; goto out; } if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; out: icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: kfree_skb(skb); return 0; } static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); IP_TUNNEL_DECLARE_FLAGS(flags); ip_tunnel_flags_copy(flags, tunnel->parms.o_flags); /* Push GRE header. */ gre_build_header(skb, tunnel->tun_hlen, flags, proto, tunnel->parms.o_key, test_bit(IP_TUNNEL_SEQ_BIT, flags) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } static int gre_handle_offloads(struct sk_buff *skb, bool csum) { return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; int tunnel_hlen; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || ip_tunnel_info_af(tun_info) != AF_INET)) goto err_free_skb; key = &tun_info->key; tunnel_hlen = gre_calc_hlen(key->tun_flags); if (skb_cow_head(skb, dev->needed_headroom)) goto err_free_skb; /* Push Tunnel header. */ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags))) goto err_free_skb; __set_bit(IP_TUNNEL_CSUM_BIT, flags); __set_bit(IP_TUNNEL_KEY_BIT, flags); __set_bit(IP_TUNNEL_SEQ_BIT, flags); ip_tunnel_flags_and(flags, tun_info->key.tun_flags, flags); gre_build_header(skb, tunnel_hlen, flags, proto, tunnel_id_to_key32(tun_info->key.tun_id), test_bit(IP_TUNNEL_SEQ_BIT, flags) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); return; err_free_skb: kfree_skb(skb); DEV_STATS_INC(dev, tx_dropped); } static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; struct erspan_metadata *md; bool truncate = false; __be16 proto; int tunnel_hlen; int version; int nhoff; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || ip_tunnel_info_af(tun_info) != AF_INET)) goto err_free_skb; key = &tun_info->key; if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags)) goto err_free_skb; if (tun_info->options_len < sizeof(*md)) goto err_free_skb; md = ip_tunnel_info_opts(tun_info); /* ERSPAN has fixed 8 byte GRE header */ version = md->version; tunnel_hlen = 8 + erspan_hdr_len(version); if (skb_cow_head(skb, dev->needed_headroom)) goto err_free_skb; if (gre_handle_offloads(skb, false)) goto err_free_skb; if (skb->len > dev->mtu + dev->hard_header_len) { if (pskb_trim(skb, dev->mtu + dev->hard_header_len)) goto err_free_skb; truncate = true; } nhoff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP) && (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; if (skb->protocol == htons(ETH_P_IPV6)) { int thoff; if (skb_transport_header_was_set(skb)) thoff = skb_transport_offset(skb); else thoff = nhoff + sizeof(struct ipv6hdr); if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) truncate = true; } if (version == 1) { erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), ntohl(md->u.index), truncate, true); proto = htons(ETH_P_ERSPAN); } else if (version == 2) { erspan_build_header_v2(skb, ntohl(tunnel_id_to_key32(key->tun_id)), md->u.md2.dir, get_hwid(&md->u.md2), truncate, true); proto = htons(ETH_P_ERSPAN2); } else { goto err_free_skb; } __set_bit(IP_TUNNEL_SEQ_BIT, flags); gre_build_header(skb, 8, flags, proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno))); ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); return; err_free_skb: kfree_skb(skb); DEV_STATS_INC(dev, tx_dropped); } static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); const struct ip_tunnel_key *key; struct rtable *rt; struct flowi4 fl4; if (ip_tunnel_info_af(info) != AF_INET) return -EINVAL; key = &info->key; ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), key->tos & ~INET_ECN_MASK, dev_net(dev), 0, skb->mark, skb_get_hash(skb), key->flow_flags); rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); info->key.u.ipv4.src = fl4.saddr; return 0; } static netdev_tx_t ipgre_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tnl_params; if (!pskb_inet_may_pull(skb)) goto free_skb; if (tunnel->collect_md) { gre_fb_xmit(skb, dev, skb->protocol); return NETDEV_TX_OK; } if (dev->header_ops) { int pull_len = tunnel->hlen + sizeof(struct iphdr); if (skb_cow_head(skb, 0)) goto free_skb; if (!pskb_may_pull(skb, pull_len)) goto free_skb; tnl_params = (const struct iphdr *)skb->data; /* ip_tunnel_xmit() needs skb->data pointing to gre header. */ skb_pull(skb, pull_len); skb_reset_mac_header(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_start(skb) < skb->data) goto free_skb; } else { if (skb_cow_head(skb, dev->needed_headroom)) goto free_skb; tnl_params = &tunnel->parms.iph; } if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags))) goto free_skb; __gre_xmit(skb, dev, tnl_params, skb->protocol); return NETDEV_TX_OK; free_skb: kfree_skb(skb); DEV_STATS_INC(dev, tx_dropped); return NETDEV_TX_OK; } static netdev_tx_t erspan_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); bool truncate = false; __be16 proto; if (!pskb_inet_may_pull(skb)) goto free_skb; if (tunnel->collect_md) { erspan_fb_xmit(skb, dev); return NETDEV_TX_OK; } if (gre_handle_offloads(skb, false)) goto free_skb; if (skb_cow_head(skb, dev->needed_headroom)) goto free_skb; if (skb->len > dev->mtu + dev->hard_header_len) { if (pskb_trim(skb, dev->mtu + dev->hard_header_len)) goto free_skb; truncate = true; } /* Push ERSPAN header */ if (tunnel->erspan_ver == 0) { proto = htons(ETH_P_ERSPAN); __clear_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags); } else if (tunnel->erspan_ver == 1) { erspan_build_header(skb, ntohl(tunnel->parms.o_key), tunnel->index, truncate, true); proto = htons(ETH_P_ERSPAN); } else if (tunnel->erspan_ver == 2) { erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key), tunnel->dir, tunnel->hwid, truncate, true); proto = htons(ETH_P_ERSPAN2); } else { goto free_skb; } __clear_bit(IP_TUNNEL_KEY_BIT, tunnel->parms.o_flags); __gre_xmit(skb, dev, &tunnel->parms.iph, proto); return NETDEV_TX_OK; free_skb: kfree_skb(skb); DEV_STATS_INC(dev, tx_dropped); return NETDEV_TX_OK; } static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); if (!pskb_inet_may_pull(skb)) goto free_skb; if (tunnel->collect_md) { gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); return NETDEV_TX_OK; } if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags))) goto free_skb; if (skb_cow_head(skb, dev->needed_headroom)) goto free_skb; __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); return NETDEV_TX_OK; free_skb: kfree_skb(skb); DEV_STATS_INC(dev, tx_dropped); return NETDEV_TX_OK; } static void ipgre_link_update(struct net_device *dev, bool set_mtu) { struct ip_tunnel *tunnel = netdev_priv(dev); int len; len = tunnel->tun_hlen; tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); len = tunnel->tun_hlen - len; tunnel->hlen = tunnel->hlen + len; if (dev->header_ops) dev->hard_header_len += len; else dev->needed_headroom += len; if (set_mtu) WRITE_ONCE(dev->mtu, max_t(int, dev->mtu - len, 68)); if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) || (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) && tunnel->encap.type != TUNNEL_ENCAP_NONE)) { dev->features &= ~NETIF_F_GSO_SOFTWARE; dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; } else { dev->features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_GSO_SOFTWARE; } } static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd) { __be16 i_flags, o_flags; int err; if (!ip_tunnel_flags_is_be16_compat(p->i_flags) || !ip_tunnel_flags_is_be16_compat(p->o_flags)) return -EOVERFLOW; i_flags = ip_tunnel_flags_to_be16(p->i_flags); o_flags = ip_tunnel_flags_to_be16(p->o_flags); if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE || p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) || ((i_flags | o_flags) & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; } gre_flags_to_tnl_flags(p->i_flags, i_flags); gre_flags_to_tnl_flags(p->o_flags, o_flags); err = ip_tunnel_ctl(dev, p, cmd); if (err) return err; if (cmd == SIOCCHGTUNNEL) { struct ip_tunnel *t = netdev_priv(dev); ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags); ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags); if (strcmp(dev->rtnl_link_ops->kind, "erspan")) ipgre_link_update(dev, true); } i_flags = gre_tnl_flags_to_gre_flags(p->i_flags); ip_tunnel_flags_from_be16(p->i_flags, i_flags); o_flags = gre_tnl_flags_to_gre_flags(p->o_flags); ip_tunnel_flags_from_be16(p->o_flags, o_flags); return 0; } /* Nice toy. Unfortunately, useless in real life :-) It allows to construct virtual multiprotocol broadcast "LAN" over the Internet, provided multicast routing is tuned. I have no idea was this bicycle invented before me, so that I had to set ARPHRD_IPGRE to a random value. I have an impression, that Cisco could make something similar, but this feature is apparently missing in IOS<=11.2(8). I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks with broadcast 224.66.66.66. If you have access to mbone, play with me :-) ping -t 255 224.66.66.66 If nobody answers, mbone does not work. ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 ip addr add 10.66.66.<somewhat>/24 dev Universe ifconfig Universe up ifconfig Universe add fe80::<Your_real_addr>/10 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 ftp 10.66.66.66 ... ftp fec0:6666:6666::193.233.7.65 ... */ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { struct ip_tunnel *t = netdev_priv(dev); struct iphdr *iph; struct gre_base_hdr *greh; iph = skb_push(skb, t->hlen + sizeof(*iph)); greh = (struct gre_base_hdr *)(iph+1); greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags); greh->protocol = htons(type); memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); /* Set the source hardware address. */ if (saddr) memcpy(&iph->saddr, saddr, 4); if (daddr) memcpy(&iph->daddr, daddr, 4); if (iph->daddr) return t->hlen + sizeof(*iph); return -(t->hlen + sizeof(*iph)); } static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) { const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb); memcpy(haddr, &iph->saddr, 4); return 4; } static const struct header_ops ipgre_header_ops = { .create = ipgre_header, .parse = ipgre_header_parse, }; #ifdef CONFIG_NET_IPGRE_BROADCAST static int ipgre_open(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); if (ipv4_is_multicast(t->parms.iph.daddr)) { struct flowi4 fl4 = { .flowi4_oif = t->parms.link, .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(&t->parms.iph)), .flowi4_scope = RT_SCOPE_UNIVERSE, .flowi4_proto = IPPROTO_GRE, .saddr = t->parms.iph.saddr, .daddr = t->parms.iph.daddr, .fl4_gre_key = t->parms.o_key, }; struct rtable *rt; rt = ip_route_output_key(t->net, &fl4); if (IS_ERR(rt)) return -EADDRNOTAVAIL; dev = rt->dst.dev; ip_rt_put(rt); if (!__in_dev_get_rtnl(dev)) return -EADDRNOTAVAIL; t->mlink = dev->ifindex; ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); } return 0; } static int ipgre_close(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { struct in_device *in_dev; in_dev = inetdev_by_index(t->net, t->mlink); if (in_dev) ip_mc_dec_group(in_dev, t->parms.iph.daddr); } return 0; } #endif static const struct net_device_ops ipgre_netdev_ops = { .ndo_init = ipgre_tunnel_init, .ndo_uninit = ip_tunnel_uninit, #ifdef CONFIG_NET_IPGRE_BROADCAST .ndo_open = ipgre_open, .ndo_stop = ipgre_close, #endif .ndo_start_xmit = ipgre_xmit, .ndo_siocdevprivate = ip_tunnel_siocdevprivate, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_tunnel_ctl = ipgre_tunnel_ctl, }; #define GRE_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_HIGHDMA | \ NETIF_F_HW_CSUM) static void ipgre_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipgre_netdev_ops; dev->type = ARPHRD_IPGRE; ip_tunnel_setup(dev, ipgre_net_id); } static void __gre_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel; tunnel = netdev_priv(dev); tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); tunnel->parms.iph.protocol = IPPROTO_GRE; tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph); dev->features |= GRE_FEATURES; dev->hw_features |= GRE_FEATURES; /* TCP offload with GRE SEQ is not supported, nor can we support 2 * levels of outer headers requiring an update. */ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags)) return; if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) && tunnel->encap.type != TUNNEL_ENCAP_NONE) return; dev->features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_GSO_SOFTWARE; dev->lltx = true; } static int ipgre_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; __gre_tunnel_init(dev); __dev_addr_set(dev, &iph->saddr, 4); memcpy(dev->broadcast, &iph->daddr, 4); dev->flags = IFF_NOARP; netif_keep_dst(dev); dev->addr_len = 4; if (iph->daddr && !tunnel->collect_md) { #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { if (!iph->saddr) return -EINVAL; dev->flags = IFF_BROADCAST; dev->header_ops = &ipgre_header_ops; dev->hard_header_len = tunnel->hlen + sizeof(*iph); dev->needed_headroom = 0; } #endif } else if (!tunnel->collect_md) { dev->header_ops = &ipgre_header_ops; dev->hard_header_len = tunnel->hlen + sizeof(*iph); dev->needed_headroom = 0; } return ip_tunnel_init(dev); } static const struct gre_protocol ipgre_protocol = { .handler = gre_rcv, .err_handler = gre_err, }; static int __net_init ipgre_init_net(struct net *net) { return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); } static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net, struct list_head *dev_to_kill) { ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops, dev_to_kill); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, .exit_batch_rtnl = ipgre_exit_batch_rtnl, .id = &ipgre_net_id, .size = sizeof(struct ip_tunnel_net), }; static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { __be16 flags; if (!data) return 0; flags = 0; if (data[IFLA_GRE_IFLAGS]) flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); if (data[IFLA_GRE_OFLAGS]) flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); if (flags & (GRE_VERSION|GRE_ROUTING)) return -EINVAL; if (data[IFLA_GRE_COLLECT_METADATA] && data[IFLA_GRE_ENCAP_TYPE] && nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) return -EINVAL; return 0; } static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { __be32 daddr; if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) return -EINVAL; if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) return -EADDRNOTAVAIL; } if (!data) goto out; if (data[IFLA_GRE_REMOTE]) { memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); if (!daddr) return -EINVAL; } out: return ipgre_tunnel_validate(tb, data, extack); } static int erspan_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { __be16 flags = 0; int ret; if (!data) return 0; ret = ipgre_tap_validate(tb, data, extack); if (ret) return ret; if (data[IFLA_GRE_ERSPAN_VER] && nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0) return 0; /* ERSPAN type II/III should only have GRE sequence and key flag */ if (data[IFLA_GRE_OFLAGS]) flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); if (data[IFLA_GRE_IFLAGS]) flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); if (!data[IFLA_GRE_COLLECT_METADATA] && flags != (GRE_SEQ | GRE_KEY)) return -EINVAL; /* ERSPAN Session ID only has 10-bit. Since we reuse * 32-bit key field as ID, check it's range. */ if (data[IFLA_GRE_IKEY] && (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK)) return -EINVAL; if (data[IFLA_GRE_OKEY] && (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK)) return -EINVAL; return 0; } static int ipgre_netlink_parms(struct net_device *dev, struct nlattr *data[], struct nlattr *tb[], struct ip_tunnel_parm_kern *parms, __u32 *fwmark) { struct ip_tunnel *t = netdev_priv(dev); memset(parms, 0, sizeof(*parms)); parms->iph.protocol = IPPROTO_GRE; if (!data) return 0; if (data[IFLA_GRE_LINK]) parms->link = nla_get_u32(data[IFLA_GRE_LINK]); if (data[IFLA_GRE_IFLAGS]) gre_flags_to_tnl_flags(parms->i_flags, nla_get_be16(data[IFLA_GRE_IFLAGS])); if (data[IFLA_GRE_OFLAGS]) gre_flags_to_tnl_flags(parms->o_flags, nla_get_be16(data[IFLA_GRE_OFLAGS])); if (data[IFLA_GRE_IKEY]) parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); if (data[IFLA_GRE_OKEY]) parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); if (data[IFLA_GRE_LOCAL]) parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]); if (data[IFLA_GRE_REMOTE]) parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]); if (data[IFLA_GRE_TTL]) parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); if (data[IFLA_GRE_TOS]) parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) { if (t->ignore_df) return -EINVAL; parms->iph.frag_off = htons(IP_DF); } if (data[IFLA_GRE_COLLECT_METADATA]) { t->collect_md = true; if (dev->type == ARPHRD_IPGRE) dev->type = ARPHRD_NONE; } if (data[IFLA_GRE_IGNORE_DF]) { if (nla_get_u8(data[IFLA_GRE_IGNORE_DF]) && (parms->iph.frag_off & htons(IP_DF))) return -EINVAL; t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]); } if (data[IFLA_GRE_FWMARK]) *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); return 0; } static int erspan_netlink_parms(struct net_device *dev, struct nlattr *data[], struct nlattr *tb[], struct ip_tunnel_parm_kern *parms, __u32 *fwmark) { struct ip_tunnel *t = netdev_priv(dev); int err; err = ipgre_netlink_parms(dev, data, tb, parms, fwmark); if (err) return err; if (!data) return 0; if (data[IFLA_GRE_ERSPAN_VER]) { t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); if (t->erspan_ver > 2) return -EINVAL; } if (t->erspan_ver == 1) { if (data[IFLA_GRE_ERSPAN_INDEX]) { t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); if (t->index & ~INDEX_MASK) return -EINVAL; } } else if (t->erspan_ver == 2) { if (data[IFLA_GRE_ERSPAN_DIR]) { t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); if (t->dir & ~(DIR_MASK >> DIR_OFFSET)) return -EINVAL; } if (data[IFLA_GRE_ERSPAN_HWID]) { t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); if (t->hwid & ~(HWID_MASK >> HWID_OFFSET)) return -EINVAL; } } return 0; } /* This function returns true when ENCAP attributes are present in the nl msg */ static bool ipgre_netlink_encap_parms(struct nlattr *data[], struct ip_tunnel_encap *ipencap) { bool ret = false; memset(ipencap, 0, sizeof(*ipencap)); if (!data) return ret; if (data[IFLA_GRE_ENCAP_TYPE]) { ret = true; ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]); } if (data[IFLA_GRE_ENCAP_FLAGS]) { ret = true; ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]); } if (data[IFLA_GRE_ENCAP_SPORT]) { ret = true; ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); } if (data[IFLA_GRE_ENCAP_DPORT]) { ret = true; ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); } return ret; } static int gre_tap_init(struct net_device *dev) { __gre_tunnel_init(dev); dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netif_keep_dst(dev); return ip_tunnel_init(dev); } static const struct net_device_ops gre_tap_netdev_ops = { .ndo_init = gre_tap_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = gre_tap_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_fill_metadata_dst = gre_fill_metadata_dst, }; static int erspan_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); if (tunnel->erspan_ver == 0) tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */ else tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */ tunnel->parms.iph.protocol = IPPROTO_GRE; tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + erspan_hdr_len(tunnel->erspan_ver); dev->features |= GRE_FEATURES; dev->hw_features |= GRE_FEATURES; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netif_keep_dst(dev); return ip_tunnel_init(dev); } static const struct net_device_ops erspan_netdev_ops = { .ndo_init = erspan_tunnel_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = erspan_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_fill_metadata_dst = gre_fill_metadata_dst, }; static void ipgre_tap_setup(struct net_device *dev) { ether_setup(dev); dev->max_mtu = 0; dev->netdev_ops = &gre_tap_netdev_ops; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; ip_tunnel_setup(dev, gre_tap_net_id); } static int ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[]) { struct ip_tunnel_encap ipencap; if (ipgre_netlink_encap_parms(data, &ipencap)) { struct ip_tunnel *t = netdev_priv(dev); int err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) return err; } return 0; } static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip_tunnel_parm_kern p; __u32 fwmark = 0; int err; err = ipgre_newlink_encap_setup(dev, data); if (err) return err; err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) return err; return ip_tunnel_newlink(dev, tb, &p, fwmark); } static int erspan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip_tunnel_parm_kern p; __u32 fwmark = 0; int err; err = ipgre_newlink_encap_setup(dev, data); if (err) return err; err = erspan_netlink_parms(dev, data, tb, &p, &fwmark); if (err) return err; return ip_tunnel_newlink(dev, tb, &p, fwmark); } static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm_kern p; __u32 fwmark = t->fwmark; int err; err = ipgre_newlink_encap_setup(dev, data); if (err) return err; err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) return err; err = ip_tunnel_changelink(dev, tb, &p, fwmark); if (err < 0) return err; ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags); ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags); ipgre_link_update(dev, !tb[IFLA_MTU]); return 0; } static int erspan_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm_kern p; __u32 fwmark = t->fwmark; int err; err = ipgre_newlink_encap_setup(dev, data); if (err) return err; err = erspan_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) return err; err = ip_tunnel_changelink(dev, tb, &p, fwmark); if (err < 0) return err; ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags); ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags); return 0; } static size_t ipgre_get_size(const struct net_device *dev) { return /* IFLA_GRE_LINK */ nla_total_size(4) + /* IFLA_GRE_IFLAGS */ nla_total_size(2) + /* IFLA_GRE_OFLAGS */ nla_total_size(2) + /* IFLA_GRE_IKEY */ nla_total_size(4) + /* IFLA_GRE_OKEY */ nla_total_size(4) + /* IFLA_GRE_LOCAL */ nla_total_size(4) + /* IFLA_GRE_REMOTE */ nla_total_size(4) + /* IFLA_GRE_TTL */ nla_total_size(1) + /* IFLA_GRE_TOS */ nla_total_size(1) + /* IFLA_GRE_PMTUDISC */ nla_total_size(1) + /* IFLA_GRE_ENCAP_TYPE */ nla_total_size(2) + /* IFLA_GRE_ENCAP_FLAGS */ nla_total_size(2) + /* IFLA_GRE_ENCAP_SPORT */ nla_total_size(2) + /* IFLA_GRE_ENCAP_DPORT */ nla_total_size(2) + /* IFLA_GRE_COLLECT_METADATA */ nla_total_size(0) + /* IFLA_GRE_IGNORE_DF */ nla_total_size(1) + /* IFLA_GRE_FWMARK */ nla_total_size(4) + /* IFLA_GRE_ERSPAN_INDEX */ nla_total_size(4) + /* IFLA_GRE_ERSPAN_VER */ nla_total_size(1) + /* IFLA_GRE_ERSPAN_DIR */ nla_total_size(1) + /* IFLA_GRE_ERSPAN_HWID */ nla_total_size(2) + 0; } static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm_kern *p = &t->parms; IP_TUNNEL_DECLARE_FLAGS(o_flags); ip_tunnel_flags_copy(o_flags, p->o_flags); if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) || nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || nla_put_u8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF))) || nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, t->encap.type) || nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, t->encap.sport) || nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, t->encap.dport) || nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, t->encap.flags)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df)) goto nla_put_failure; if (t->collect_md) { if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA)) goto nla_put_failure; } return 0; nla_put_failure: return -EMSGSIZE; } static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); if (t->erspan_ver <= 2) { if (t->erspan_ver != 0 && !t->collect_md) __set_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags); if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) goto nla_put_failure; if (t->erspan_ver == 1) { if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) goto nla_put_failure; } else if (t->erspan_ver == 2) { if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) goto nla_put_failure; } } return ipgre_fill_info(skb, dev); nla_put_failure: return -EMSGSIZE; } static void erspan_setup(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); ether_setup(dev); dev->max_mtu = 0; dev->netdev_ops = &erspan_netdev_ops; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; ip_tunnel_setup(dev, erspan_net_id); t->erspan_ver = 1; } static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { [IFLA_GRE_LINK] = { .type = NLA_U32 }, [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, [IFLA_GRE_IKEY] = { .type = NLA_U32 }, [IFLA_GRE_OKEY] = { .type = NLA_U32 }, [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) }, [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, [IFLA_GRE_TTL] = { .type = NLA_U8 }, [IFLA_GRE_TOS] = { .type = NLA_U8 }, [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 }, [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 }, [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 }, [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 }, }; static struct rtnl_link_ops ipgre_link_ops __read_mostly = { .kind = "gre", .maxtype = IFLA_GRE_MAX, .policy = ipgre_policy, .priv_size = sizeof(struct ip_tunnel), .setup = ipgre_tunnel_setup, .validate = ipgre_tunnel_validate, .newlink = ipgre_newlink, .changelink = ipgre_changelink, .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, .get_link_net = ip_tunnel_get_link_net, }; static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { .kind = "gretap", .maxtype = IFLA_GRE_MAX, .policy = ipgre_policy, .priv_size = sizeof(struct ip_tunnel), .setup = ipgre_tap_setup, .validate = ipgre_tap_validate, .newlink = ipgre_newlink, .changelink = ipgre_changelink, .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, .get_link_net = ip_tunnel_get_link_net, }; static struct rtnl_link_ops erspan_link_ops __read_mostly = { .kind = "erspan", .maxtype = IFLA_GRE_MAX, .policy = ipgre_policy, .priv_size = sizeof(struct ip_tunnel), .setup = erspan_setup, .validate = erspan_validate, .newlink = erspan_newlink, .changelink = erspan_changelink, .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = erspan_fill_info, .get_link_net = ip_tunnel_get_link_net, }; struct net_device *gretap_fb_dev_create(struct net *net, const char *name, u8 name_assign_type) { struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; LIST_HEAD(list_kill); struct ip_tunnel *t; int err; memset(&tb, 0, sizeof(tb)); dev = rtnl_create_link(net, name, name_assign_type, &ipgre_tap_ops, tb, NULL); if (IS_ERR(dev)) return dev; /* Configure flow based GRE device. */ t = netdev_priv(dev); t->collect_md = true; err = ipgre_newlink(net, dev, tb, NULL, NULL); if (err < 0) { free_netdev(dev); return ERR_PTR(err); } /* openvswitch users expect packet sizes to be unrestricted, * so set the largest MTU we can. */ err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false); if (err) goto out; err = rtnl_configure_link(dev, NULL, 0, NULL); if (err < 0) goto out; return dev; out: ip_tunnel_dellink(dev, &list_kill); unregister_netdevice_many(&list_kill); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(gretap_fb_dev_create); static int __net_init ipgre_tap_init_net(struct net *net) { return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); } static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net, struct list_head *dev_to_kill) { ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops, dev_to_kill); } static struct pernet_operations ipgre_tap_net_ops = { .init = ipgre_tap_init_net, .exit_batch_rtnl = ipgre_tap_exit_batch_rtnl, .id = &gre_tap_net_id, .size = sizeof(struct ip_tunnel_net), }; static int __net_init erspan_init_net(struct net *net) { return ip_tunnel_init_net(net, erspan_net_id, &erspan_link_ops, "erspan0"); } static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list, struct list_head *dev_to_kill) { ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops, dev_to_kill); } static struct pernet_operations erspan_net_ops = { .init = erspan_init_net, .exit_batch_rtnl = erspan_exit_batch_rtnl, .id = &erspan_net_id, .size = sizeof(struct ip_tunnel_net), }; static int __init ipgre_init(void) { int err; pr_info("GRE over IPv4 tunneling driver\n"); err = register_pernet_device(&ipgre_net_ops); if (err < 0) return err; err = register_pernet_device(&ipgre_tap_net_ops); if (err < 0) goto pnet_tap_failed; err = register_pernet_device(&erspan_net_ops); if (err < 0) goto pnet_erspan_failed; err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); if (err < 0) { pr_info("%s: can't add protocol\n", __func__); goto add_proto_failed; } err = rtnl_link_register(&ipgre_link_ops); if (err < 0) goto rtnl_link_failed; err = rtnl_link_register(&ipgre_tap_ops); if (err < 0) goto tap_ops_failed; err = rtnl_link_register(&erspan_link_ops); if (err < 0) goto erspan_link_failed; return 0; erspan_link_failed: rtnl_link_unregister(&ipgre_tap_ops); tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); add_proto_failed: unregister_pernet_device(&erspan_net_ops); pnet_erspan_failed: unregister_pernet_device(&ipgre_tap_net_ops); pnet_tap_failed: unregister_pernet_device(&ipgre_net_ops); return err; } static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); rtnl_link_unregister(&erspan_link_ops); gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); unregister_pernet_device(&ipgre_tap_net_ops); unregister_pernet_device(&ipgre_net_ops); unregister_pernet_device(&erspan_net_ops); } module_init(ipgre_init); module_exit(ipgre_fini); MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("gre"); MODULE_ALIAS_RTNL_LINK("gretap"); MODULE_ALIAS_RTNL_LINK("erspan"); MODULE_ALIAS_NETDEV("gre0"); MODULE_ALIAS_NETDEV("gretap0"); MODULE_ALIAS_NETDEV("erspan0");
5 5 15 10 2 21 3 2 2 20 20 5 20 20 8 7 12 1 12 1 12 20 7 21 3 21 38 37 55 55 54 54 55 10 10 79 79 4 4 4 29 29 52 41 41 21 21 7 9 40 4 41 41 8 2 8 8 8 3 8 8 2 18 18 6 7 8 18 9 42 7 21 3 2 9 17 17 17 6 13 12 13 12 12 12 5 5 5 5 5 4 2 2 2 2 2 3 3 3 3 3 3 3 3 2 2 2 2 3 3 3 3 2 2 2 5 5 1 5 4 4 4 5 5 5 3 3 3 5 5 5 1 1 1 12 12 12 12 12 3 3 3 15 15 15 15 15 15 9 15 15 16 16 31 2 2 2 2 1 1 1 1 1 1 2 1 2 2 2 2 2 1 1 1 1 1 9 10 8 3 10 3 3 3 6 6 5 2 3 3 3 4 1 2 2 2 2 2 1 2 2 2 2 2 2 2 4 2 4 2 4 2 2 2 2 13 13 8 7 3 6 5 5 13 4 4 4 2 3 1 2 1 1 1 1 1 1 1 1 1 1 1 2 3 13 12 13 13 13 12 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 4 4 4 3 2 2 1 1 4 4 1 1 1 1 1 2 1 4 1 1 9 8 9 9 9 1 1 3 3 3 3 3 1 1 3 3 3 1 1 13 51 1 1 13 13 13 12 10 10 13 13 13 9 1 43 30 31 31 31 31 31 26 24 45 43 42 42 8 34 38 4 39 39 1 1 38 38 39 1 38 38 38 38 9 1 37 37 37 37 37 13 10 10 9 3 38 42 45 1 1 1 1 1 1 1 1 1 1 5 5 3 5 5 5 4 3 3 5 5 37 9 5 9 4 5 5 9 5 3 3 3 3 3 3 5 12 12 8 8 8 8 2 2 2 2 1 1 1 1 17 15 17 14 15 1 13 13 1 1 1 2 2 1 1 1 1 1 1 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org> Copyright (C) 2010 Google Inc. Copyright (C) 2011 ProFUSION Embedded Systems Copyright (c) 2012 Code Aurora Forum. All rights reserved. Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ /* Bluetooth L2CAP core. */ #include <linux/module.h> #include <linux/debugfs.h> #include <linux/crc16.h> #include <linux/filter.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> #include "smp.h" #define LE_FLOWCTL_MAX_CREDITS 65535 bool disable_ertm; bool enable_ecred = IS_ENABLED(CONFIG_BT_LE_L2CAP_ECRED); static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN | L2CAP_FEAT_UCD; static LIST_HEAD(chan_list); static DEFINE_RWLOCK(chan_list_lock); static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, u8 ident, u16 dlen, void *data); static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data); static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size); static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err); static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff_head *skbs, u8 event); static void l2cap_retrans_timeout(struct work_struct *work); static void l2cap_monitor_timeout(struct work_struct *work); static void l2cap_ack_timeout(struct work_struct *work); static inline u8 bdaddr_type(u8 link_type, u8 bdaddr_type) { if (link_type == LE_LINK) { if (bdaddr_type == ADDR_LE_DEV_PUBLIC) return BDADDR_LE_PUBLIC; else return BDADDR_LE_RANDOM; } return BDADDR_BREDR; } static inline u8 bdaddr_src_type(struct hci_conn *hcon) { return bdaddr_type(hcon->type, hcon->src_type); } static inline u8 bdaddr_dst_type(struct hci_conn *hcon) { return bdaddr_type(hcon->type, hcon->dst_type); } /* ---- L2CAP channels ---- */ static struct l2cap_chan *__l2cap_get_chan_by_dcid(struct l2cap_conn *conn, u16 cid) { struct l2cap_chan *c; list_for_each_entry(c, &conn->chan_l, list) { if (c->dcid == cid) return c; } return NULL; } static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn, u16 cid) { struct l2cap_chan *c; list_for_each_entry(c, &conn->chan_l, list) { if (c->scid == cid) return c; } return NULL; } /* Find channel with given SCID. * Returns a reference locked channel. */ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, u16 cid) { struct l2cap_chan *c; mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_scid(conn, cid); if (c) { /* Only lock if chan reference is not 0 */ c = l2cap_chan_hold_unless_zero(c); if (c) l2cap_chan_lock(c); } mutex_unlock(&conn->chan_lock); return c; } /* Find channel with given DCID. * Returns a reference locked channel. */ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, u16 cid) { struct l2cap_chan *c; mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_dcid(conn, cid); if (c) { /* Only lock if chan reference is not 0 */ c = l2cap_chan_hold_unless_zero(c); if (c) l2cap_chan_lock(c); } mutex_unlock(&conn->chan_lock); return c; } static struct l2cap_chan *__l2cap_get_chan_by_ident(struct l2cap_conn *conn, u8 ident) { struct l2cap_chan *c; list_for_each_entry(c, &conn->chan_l, list) { if (c->ident == ident) return c; } return NULL; } static struct l2cap_chan *__l2cap_global_chan_by_addr(__le16 psm, bdaddr_t *src, u8 src_type) { struct l2cap_chan *c; list_for_each_entry(c, &chan_list, global_l) { if (src_type == BDADDR_BREDR && c->src_type != BDADDR_BREDR) continue; if (src_type != BDADDR_BREDR && c->src_type == BDADDR_BREDR) continue; if (c->sport == psm && !bacmp(&c->src, src)) return c; } return NULL; } int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm) { int err; write_lock(&chan_list_lock); if (psm && __l2cap_global_chan_by_addr(psm, src, chan->src_type)) { err = -EADDRINUSE; goto done; } if (psm) { chan->psm = psm; chan->sport = psm; err = 0; } else { u16 p, start, end, incr; if (chan->src_type == BDADDR_BREDR) { start = L2CAP_PSM_DYN_START; end = L2CAP_PSM_AUTO_END; incr = 2; } else { start = L2CAP_PSM_LE_DYN_START; end = L2CAP_PSM_LE_DYN_END; incr = 1; } err = -EINVAL; for (p = start; p <= end; p += incr) if (!__l2cap_global_chan_by_addr(cpu_to_le16(p), src, chan->src_type)) { chan->psm = cpu_to_le16(p); chan->sport = cpu_to_le16(p); err = 0; break; } } done: write_unlock(&chan_list_lock); return err; } EXPORT_SYMBOL_GPL(l2cap_add_psm); int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid) { write_lock(&chan_list_lock); /* Override the defaults (which are for conn-oriented) */ chan->omtu = L2CAP_DEFAULT_MTU; chan->chan_type = L2CAP_CHAN_FIXED; chan->scid = scid; write_unlock(&chan_list_lock); return 0; } static u16 l2cap_alloc_cid(struct l2cap_conn *conn) { u16 cid, dyn_end; if (conn->hcon->type == LE_LINK) dyn_end = L2CAP_CID_LE_DYN_END; else dyn_end = L2CAP_CID_DYN_END; for (cid = L2CAP_CID_DYN_START; cid <= dyn_end; cid++) { if (!__l2cap_get_chan_by_scid(conn, cid)) return cid; } return 0; } static void l2cap_state_change(struct l2cap_chan *chan, int state) { BT_DBG("chan %p %s -> %s", chan, state_to_string(chan->state), state_to_string(state)); chan->state = state; chan->ops->state_change(chan, state, 0); } static inline void l2cap_state_change_and_error(struct l2cap_chan *chan, int state, int err) { chan->state = state; chan->ops->state_change(chan, chan->state, err); } static inline void l2cap_chan_set_err(struct l2cap_chan *chan, int err) { chan->ops->state_change(chan, chan->state, err); } static void __set_retrans_timer(struct l2cap_chan *chan) { if (!delayed_work_pending(&chan->monitor_timer) && chan->retrans_timeout) { l2cap_set_timer(chan, &chan->retrans_timer, msecs_to_jiffies(chan->retrans_timeout)); } } static void __set_monitor_timer(struct l2cap_chan *chan) { __clear_retrans_timer(chan); if (chan->monitor_timeout) { l2cap_set_timer(chan, &chan->monitor_timer, msecs_to_jiffies(chan->monitor_timeout)); } } static struct sk_buff *l2cap_ertm_seq_in_queue(struct sk_buff_head *head, u16 seq) { struct sk_buff *skb; skb_queue_walk(head, skb) { if (bt_cb(skb)->l2cap.txseq == seq) return skb; } return NULL; } /* ---- L2CAP sequence number lists ---- */ /* For ERTM, ordered lists of sequence numbers must be tracked for * SREJ requests that are received and for frames that are to be * retransmitted. These seq_list functions implement a singly-linked * list in an array, where membership in the list can also be checked * in constant time. Items can also be added to the tail of the list * and removed from the head in constant time, without further memory * allocs or frees. */ static int l2cap_seq_list_init(struct l2cap_seq_list *seq_list, u16 size) { size_t alloc_size, i; /* Allocated size is a power of 2 to map sequence numbers * (which may be up to 14 bits) in to a smaller array that is * sized for the negotiated ERTM transmit windows. */ alloc_size = roundup_pow_of_two(size); seq_list->list = kmalloc_array(alloc_size, sizeof(u16), GFP_KERNEL); if (!seq_list->list) return -ENOMEM; seq_list->mask = alloc_size - 1; seq_list->head = L2CAP_SEQ_LIST_CLEAR; seq_list->tail = L2CAP_SEQ_LIST_CLEAR; for (i = 0; i < alloc_size; i++) seq_list->list[i] = L2CAP_SEQ_LIST_CLEAR; return 0; } static inline void l2cap_seq_list_free(struct l2cap_seq_list *seq_list) { kfree(seq_list->list); } static inline bool l2cap_seq_list_contains(struct l2cap_seq_list *seq_list, u16 seq) { /* Constant-time check for list membership */ return seq_list->list[seq & seq_list->mask] != L2CAP_SEQ_LIST_CLEAR; } static inline u16 l2cap_seq_list_pop(struct l2cap_seq_list *seq_list) { u16 seq = seq_list->head; u16 mask = seq_list->mask; seq_list->head = seq_list->list[seq & mask]; seq_list->list[seq & mask] = L2CAP_SEQ_LIST_CLEAR; if (seq_list->head == L2CAP_SEQ_LIST_TAIL) { seq_list->head = L2CAP_SEQ_LIST_CLEAR; seq_list->tail = L2CAP_SEQ_LIST_CLEAR; } return seq; } static void l2cap_seq_list_clear(struct l2cap_seq_list *seq_list) { u16 i; if (seq_list->head == L2CAP_SEQ_LIST_CLEAR) return; for (i = 0; i <= seq_list->mask; i++) seq_list->list[i] = L2CAP_SEQ_LIST_CLEAR; seq_list->head = L2CAP_SEQ_LIST_CLEAR; seq_list->tail = L2CAP_SEQ_LIST_CLEAR; } static void l2cap_seq_list_append(struct l2cap_seq_list *seq_list, u16 seq) { u16 mask = seq_list->mask; /* All appends happen in constant time */ if (seq_list->list[seq & mask] != L2CAP_SEQ_LIST_CLEAR) return; if (seq_list->tail == L2CAP_SEQ_LIST_CLEAR) seq_list->head = seq; else seq_list->list[seq_list->tail & mask] = seq; seq_list->tail = seq; seq_list->list[seq & mask] = L2CAP_SEQ_LIST_TAIL; } static void l2cap_chan_timeout(struct work_struct *work) { struct l2cap_chan *chan = container_of(work, struct l2cap_chan, chan_timer.work); struct l2cap_conn *conn = chan->conn; int reason; BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); if (!conn) return; mutex_lock(&conn->chan_lock); /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling * this work. No need to call l2cap_chan_hold(chan) here again. */ l2cap_chan_lock(chan); if (chan->state == BT_CONNECTED || chan->state == BT_CONFIG) reason = ECONNREFUSED; else if (chan->state == BT_CONNECT && chan->sec_level != BT_SECURITY_SDP) reason = ECONNREFUSED; else reason = ETIMEDOUT; l2cap_chan_close(chan, reason); chan->ops->close(chan); l2cap_chan_unlock(chan); l2cap_chan_put(chan); mutex_unlock(&conn->chan_lock); } struct l2cap_chan *l2cap_chan_create(void) { struct l2cap_chan *chan; chan = kzalloc(sizeof(*chan), GFP_ATOMIC); if (!chan) return NULL; skb_queue_head_init(&chan->tx_q); skb_queue_head_init(&chan->srej_q); mutex_init(&chan->lock); /* Set default lock nesting level */ atomic_set(&chan->nesting, L2CAP_NESTING_NORMAL); /* Available receive buffer space is initially unknown */ chan->rx_avail = -1; write_lock(&chan_list_lock); list_add(&chan->global_l, &chan_list); write_unlock(&chan_list_lock); INIT_DELAYED_WORK(&chan->chan_timer, l2cap_chan_timeout); INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout); INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout); INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout); chan->state = BT_OPEN; kref_init(&chan->kref); /* This flag is cleared in l2cap_chan_ready() */ set_bit(CONF_NOT_COMPLETE, &chan->conf_state); BT_DBG("chan %p", chan); return chan; } EXPORT_SYMBOL_GPL(l2cap_chan_create); static void l2cap_chan_destroy(struct kref *kref) { struct l2cap_chan *chan = container_of(kref, struct l2cap_chan, kref); BT_DBG("chan %p", chan); write_lock(&chan_list_lock); list_del(&chan->global_l); write_unlock(&chan_list_lock); kfree(chan); } void l2cap_chan_hold(struct l2cap_chan *c) { BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); kref_get(&c->kref); } struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c) { BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); if (!kref_get_unless_zero(&c->kref)) return NULL; return c; } void l2cap_chan_put(struct l2cap_chan *c) { BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); kref_put(&c->kref, l2cap_chan_destroy); } EXPORT_SYMBOL_GPL(l2cap_chan_put); void l2cap_chan_set_defaults(struct l2cap_chan *chan) { chan->fcs = L2CAP_FCS_CRC16; chan->max_tx = L2CAP_DEFAULT_MAX_TX; chan->tx_win = L2CAP_DEFAULT_TX_WINDOW; chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW; chan->remote_max_tx = chan->max_tx; chan->remote_tx_win = chan->tx_win; chan->ack_win = L2CAP_DEFAULT_TX_WINDOW; chan->sec_level = BT_SECURITY_LOW; chan->flush_to = L2CAP_DEFAULT_FLUSH_TO; chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO; chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO; chan->conf_state = 0; set_bit(CONF_NOT_COMPLETE, &chan->conf_state); set_bit(FLAG_FORCE_ACTIVE, &chan->flags); } EXPORT_SYMBOL_GPL(l2cap_chan_set_defaults); static __u16 l2cap_le_rx_credits(struct l2cap_chan *chan) { size_t sdu_len = chan->sdu ? chan->sdu->len : 0; if (chan->mps == 0) return 0; /* If we don't know the available space in the receiver buffer, give * enough credits for a full packet. */ if (chan->rx_avail == -1) return (chan->imtu / chan->mps) + 1; /* If we know how much space is available in the receive buffer, give * out as many credits as would fill the buffer. */ if (chan->rx_avail <= sdu_len) return 0; return DIV_ROUND_UP(chan->rx_avail - sdu_len, chan->mps); } static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits) { chan->sdu = NULL; chan->sdu_last_frag = NULL; chan->sdu_len = 0; chan->tx_credits = tx_credits; /* Derive MPS from connection MTU to stop HCI fragmentation */ chan->mps = min_t(u16, chan->imtu, chan->conn->mtu - L2CAP_HDR_SIZE); chan->rx_credits = l2cap_le_rx_credits(chan); skb_queue_head_init(&chan->tx_q); } static void l2cap_ecred_init(struct l2cap_chan *chan, u16 tx_credits) { l2cap_le_flowctl_init(chan, tx_credits); /* L2CAP implementations shall support a minimum MPS of 64 octets */ if (chan->mps < L2CAP_ECRED_MIN_MPS) { chan->mps = L2CAP_ECRED_MIN_MPS; chan->rx_credits = l2cap_le_rx_credits(chan); } } void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, __le16_to_cpu(chan->psm), chan->dcid); conn->disc_reason = HCI_ERROR_REMOTE_USER_TERM; chan->conn = conn; switch (chan->chan_type) { case L2CAP_CHAN_CONN_ORIENTED: /* Alloc CID for connection-oriented socket */ chan->scid = l2cap_alloc_cid(conn); if (conn->hcon->type == ACL_LINK) chan->omtu = L2CAP_DEFAULT_MTU; break; case L2CAP_CHAN_CONN_LESS: /* Connectionless socket */ chan->scid = L2CAP_CID_CONN_LESS; chan->dcid = L2CAP_CID_CONN_LESS; chan->omtu = L2CAP_DEFAULT_MTU; break; case L2CAP_CHAN_FIXED: /* Caller will set CID and CID specific MTU values */ break; default: /* Raw socket can send/recv signalling messages only */ chan->scid = L2CAP_CID_SIGNALING; chan->dcid = L2CAP_CID_SIGNALING; chan->omtu = L2CAP_DEFAULT_MTU; } chan->local_id = L2CAP_BESTEFFORT_ID; chan->local_stype = L2CAP_SERV_BESTEFFORT; chan->local_msdu = L2CAP_DEFAULT_MAX_SDU_SIZE; chan->local_sdu_itime = L2CAP_DEFAULT_SDU_ITIME; chan->local_acc_lat = L2CAP_DEFAULT_ACC_LAT; chan->local_flush_to = L2CAP_EFS_DEFAULT_FLUSH_TO; l2cap_chan_hold(chan); /* Only keep a reference for fixed channels if they requested it */ if (chan->chan_type != L2CAP_CHAN_FIXED || test_bit(FLAG_HOLD_HCI_CONN, &chan->flags)) hci_conn_hold(conn->hcon); list_add(&chan->list, &conn->chan_l); } void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { mutex_lock(&conn->chan_lock); __l2cap_chan_add(conn, chan); mutex_unlock(&conn->chan_lock); } void l2cap_chan_del(struct l2cap_chan *chan, int err) { struct l2cap_conn *conn = chan->conn; __clear_chan_timer(chan); BT_DBG("chan %p, conn %p, err %d, state %s", chan, conn, err, state_to_string(chan->state)); chan->ops->teardown(chan, err); if (conn) { /* Delete from channel list */ list_del(&chan->list); l2cap_chan_put(chan); chan->conn = NULL; /* Reference was only held for non-fixed channels or * fixed channels that explicitly requested it using the * FLAG_HOLD_HCI_CONN flag. */ if (chan->chan_type != L2CAP_CHAN_FIXED || test_bit(FLAG_HOLD_HCI_CONN, &chan->flags)) hci_conn_drop(conn->hcon); } if (test_bit(CONF_NOT_COMPLETE, &chan->conf_state)) return; switch (chan->mode) { case L2CAP_MODE_BASIC: break; case L2CAP_MODE_LE_FLOWCTL: case L2CAP_MODE_EXT_FLOWCTL: skb_queue_purge(&chan->tx_q); break; case L2CAP_MODE_ERTM: __clear_retrans_timer(chan); __clear_monitor_timer(chan); __clear_ack_timer(chan); skb_queue_purge(&chan->srej_q); l2cap_seq_list_free(&chan->srej_list); l2cap_seq_list_free(&chan->retrans_list); fallthrough; case L2CAP_MODE_STREAMING: skb_queue_purge(&chan->tx_q); break; } } EXPORT_SYMBOL_GPL(l2cap_chan_del); static void __l2cap_chan_list_id(struct l2cap_conn *conn, u16 id, l2cap_chan_func_t func, void *data) { struct l2cap_chan *chan, *l; list_for_each_entry_safe(chan, l, &conn->chan_l, list) { if (chan->ident == id) func(chan, data); } } static void __l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func, void *data) { struct l2cap_chan *chan; list_for_each_entry(chan, &conn->chan_l, list) { func(chan, data); } } void l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func, void *data) { if (!conn) return; mutex_lock(&conn->chan_lock); __l2cap_chan_list(conn, func, data); mutex_unlock(&conn->chan_lock); } EXPORT_SYMBOL_GPL(l2cap_chan_list); static void l2cap_conn_update_id_addr(struct work_struct *work) { struct l2cap_conn *conn = container_of(work, struct l2cap_conn, id_addr_timer.work); struct hci_conn *hcon = conn->hcon; struct l2cap_chan *chan; mutex_lock(&conn->chan_lock); list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); bacpy(&chan->dst, &hcon->dst); chan->dst_type = bdaddr_dst_type(hcon); l2cap_chan_unlock(chan); } mutex_unlock(&conn->chan_lock); } static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_le_conn_rsp rsp; u16 result; if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) result = L2CAP_CR_LE_AUTHORIZATION; else result = L2CAP_CR_LE_BAD_PSM; l2cap_state_change(chan, BT_DISCONN); rsp.dcid = cpu_to_le16(chan->scid); rsp.mtu = cpu_to_le16(chan->imtu); rsp.mps = cpu_to_le16(chan->mps); rsp.credits = cpu_to_le16(chan->rx_credits); rsp.result = cpu_to_le16(result); l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), &rsp); } static void l2cap_chan_ecred_connect_reject(struct l2cap_chan *chan) { l2cap_state_change(chan, BT_DISCONN); __l2cap_ecred_conn_rsp_defer(chan); } static void l2cap_chan_connect_reject(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_conn_rsp rsp; u16 result; if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) result = L2CAP_CR_SEC_BLOCK; else result = L2CAP_CR_BAD_PSM; l2cap_state_change(chan, BT_DISCONN); rsp.scid = cpu_to_le16(chan->dcid); rsp.dcid = cpu_to_le16(chan->scid); rsp.result = cpu_to_le16(result); rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); } void l2cap_chan_close(struct l2cap_chan *chan, int reason) { struct l2cap_conn *conn = chan->conn; BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); switch (chan->state) { case BT_LISTEN: chan->ops->teardown(chan, 0); break; case BT_CONNECTED: case BT_CONFIG: if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED) { __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); l2cap_send_disconn_req(chan, reason); } else l2cap_chan_del(chan, reason); break; case BT_CONNECT2: if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED) { if (conn->hcon->type == ACL_LINK) l2cap_chan_connect_reject(chan); else if (conn->hcon->type == LE_LINK) { switch (chan->mode) { case L2CAP_MODE_LE_FLOWCTL: l2cap_chan_le_connect_reject(chan); break; case L2CAP_MODE_EXT_FLOWCTL: l2cap_chan_ecred_connect_reject(chan); return; } } } l2cap_chan_del(chan, reason); break; case BT_CONNECT: case BT_DISCONN: l2cap_chan_del(chan, reason); break; default: chan->ops->teardown(chan, 0); break; } } EXPORT_SYMBOL(l2cap_chan_close); static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan) { switch (chan->chan_type) { case L2CAP_CHAN_RAW: switch (chan->sec_level) { case BT_SECURITY_HIGH: case BT_SECURITY_FIPS: return HCI_AT_DEDICATED_BONDING_MITM; case BT_SECURITY_MEDIUM: return HCI_AT_DEDICATED_BONDING; default: return HCI_AT_NO_BONDING; } break; case L2CAP_CHAN_CONN_LESS: if (chan->psm == cpu_to_le16(L2CAP_PSM_3DSP)) { if (chan->sec_level == BT_SECURITY_LOW) chan->sec_level = BT_SECURITY_SDP; } if (chan->sec_level == BT_SECURITY_HIGH || chan->sec_level == BT_SECURITY_FIPS) return HCI_AT_NO_BONDING_MITM; else return HCI_AT_NO_BONDING; break; case L2CAP_CHAN_CONN_ORIENTED: if (chan->psm == cpu_to_le16(L2CAP_PSM_SDP)) { if (chan->sec_level == BT_SECURITY_LOW) chan->sec_level = BT_SECURITY_SDP; if (chan->sec_level == BT_SECURITY_HIGH || chan->sec_level == BT_SECURITY_FIPS) return HCI_AT_NO_BONDING_MITM; else return HCI_AT_NO_BONDING; } fallthrough; default: switch (chan->sec_level) { case BT_SECURITY_HIGH: case BT_SECURITY_FIPS: return HCI_AT_GENERAL_BONDING_MITM; case BT_SECURITY_MEDIUM: return HCI_AT_GENERAL_BONDING; default: return HCI_AT_NO_BONDING; } break; } } /* Service level security */ int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator) { struct l2cap_conn *conn = chan->conn; __u8 auth_type; if (conn->hcon->type == LE_LINK) return smp_conn_security(conn->hcon, chan->sec_level); auth_type = l2cap_get_auth_type(chan); return hci_conn_security(conn->hcon, chan->sec_level, auth_type, initiator); } static u8 l2cap_get_ident(struct l2cap_conn *conn) { u8 id; /* Get next available identificator. * 1 - 128 are used by kernel. * 129 - 199 are reserved. * 200 - 254 are used by utilities like l2ping, etc. */ mutex_lock(&conn->ident_lock); if (++conn->tx_ident > 128) conn->tx_ident = 1; id = conn->tx_ident; mutex_unlock(&conn->ident_lock); return id; } static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) { struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data); u8 flags; BT_DBG("code 0x%2.2x", code); if (!skb) return; /* Use NO_FLUSH if supported or we have an LE link (which does * not support auto-flushing packets) */ if (lmp_no_flush_capable(conn->hcon->hdev) || conn->hcon->type == LE_LINK) flags = ACL_START_NO_FLUSH; else flags = ACL_START; bt_cb(skb)->force_active = BT_POWER_FORCE_ACTIVE_ON; skb->priority = HCI_PRIO_MAX; hci_send_acl(conn->hchan, skb, flags); } static void l2cap_do_send(struct l2cap_chan *chan, struct sk_buff *skb) { struct hci_conn *hcon = chan->conn->hcon; u16 flags; BT_DBG("chan %p, skb %p len %d priority %u", chan, skb, skb->len, skb->priority); /* Use NO_FLUSH for LE links (where this is the only option) or * if the BR/EDR link supports it and flushing has not been * explicitly requested (through FLAG_FLUSHABLE). */ if (hcon->type == LE_LINK || (!test_bit(FLAG_FLUSHABLE, &chan->flags) && lmp_no_flush_capable(hcon->hdev))) flags = ACL_START_NO_FLUSH; else flags = ACL_START; bt_cb(skb)->force_active = test_bit(FLAG_FORCE_ACTIVE, &chan->flags); hci_send_acl(chan->conn->hchan, skb, flags); } static void __unpack_enhanced_control(u16 enh, struct l2cap_ctrl *control) { control->reqseq = (enh & L2CAP_CTRL_REQSEQ) >> L2CAP_CTRL_REQSEQ_SHIFT; control->final = (enh & L2CAP_CTRL_FINAL) >> L2CAP_CTRL_FINAL_SHIFT; if (enh & L2CAP_CTRL_FRAME_TYPE) { /* S-Frame */ control->sframe = 1; control->poll = (enh & L2CAP_CTRL_POLL) >> L2CAP_CTRL_POLL_SHIFT; control->super = (enh & L2CAP_CTRL_SUPERVISE) >> L2CAP_CTRL_SUPER_SHIFT; control->sar = 0; control->txseq = 0; } else { /* I-Frame */ control->sframe = 0; control->sar = (enh & L2CAP_CTRL_SAR) >> L2CAP_CTRL_SAR_SHIFT; control->txseq = (enh & L2CAP_CTRL_TXSEQ) >> L2CAP_CTRL_TXSEQ_SHIFT; control->poll = 0; control->super = 0; } } static void __unpack_extended_control(u32 ext, struct l2cap_ctrl *control) { control->reqseq = (ext & L2CAP_EXT_CTRL_REQSEQ) >> L2CAP_EXT_CTRL_REQSEQ_SHIFT; control->final = (ext & L2CAP_EXT_CTRL_FINAL) >> L2CAP_EXT_CTRL_FINAL_SHIFT; if (ext & L2CAP_EXT_CTRL_FRAME_TYPE) { /* S-Frame */ control->sframe = 1; control->poll = (ext & L2CAP_EXT_CTRL_POLL) >> L2CAP_EXT_CTRL_POLL_SHIFT; control->super = (ext & L2CAP_EXT_CTRL_SUPERVISE) >> L2CAP_EXT_CTRL_SUPER_SHIFT; control->sar = 0; control->txseq = 0; } else { /* I-Frame */ control->sframe = 0; control->sar = (ext & L2CAP_EXT_CTRL_SAR) >> L2CAP_EXT_CTRL_SAR_SHIFT; control->txseq = (ext & L2CAP_EXT_CTRL_TXSEQ) >> L2CAP_EXT_CTRL_TXSEQ_SHIFT; control->poll = 0; control->super = 0; } } static inline void __unpack_control(struct l2cap_chan *chan, struct sk_buff *skb) { if (test_bit(FLAG_EXT_CTRL, &chan->flags)) { __unpack_extended_control(get_unaligned_le32(skb->data), &bt_cb(skb)->l2cap); skb_pull(skb, L2CAP_EXT_CTRL_SIZE); } else { __unpack_enhanced_control(get_unaligned_le16(skb->data), &bt_cb(skb)->l2cap); skb_pull(skb, L2CAP_ENH_CTRL_SIZE); } } static u32 __pack_extended_control(struct l2cap_ctrl *control) { u32 packed; packed = control->reqseq << L2CAP_EXT_CTRL_REQSEQ_SHIFT; packed |= control->final << L2CAP_EXT_CTRL_FINAL_SHIFT; if (control->sframe) { packed |= control->poll << L2CAP_EXT_CTRL_POLL_SHIFT; packed |= control->super << L2CAP_EXT_CTRL_SUPER_SHIFT; packed |= L2CAP_EXT_CTRL_FRAME_TYPE; } else { packed |= control->sar << L2CAP_EXT_CTRL_SAR_SHIFT; packed |= control->txseq << L2CAP_EXT_CTRL_TXSEQ_SHIFT; } return packed; } static u16 __pack_enhanced_control(struct l2cap_ctrl *control) { u16 packed; packed = control->reqseq << L2CAP_CTRL_REQSEQ_SHIFT; packed |= control->final << L2CAP_CTRL_FINAL_SHIFT; if (control->sframe) { packed |= control->poll << L2CAP_CTRL_POLL_SHIFT; packed |= control->super << L2CAP_CTRL_SUPER_SHIFT; packed |= L2CAP_CTRL_FRAME_TYPE; } else { packed |= control->sar << L2CAP_CTRL_SAR_SHIFT; packed |= control->txseq << L2CAP_CTRL_TXSEQ_SHIFT; } return packed; } static inline void __pack_control(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb) { if (test_bit(FLAG_EXT_CTRL, &chan->flags)) { put_unaligned_le32(__pack_extended_control(control), skb->data + L2CAP_HDR_SIZE); } else { put_unaligned_le16(__pack_enhanced_control(control), skb->data + L2CAP_HDR_SIZE); } } static inline unsigned int __ertm_hdr_size(struct l2cap_chan *chan) { if (test_bit(FLAG_EXT_CTRL, &chan->flags)) return L2CAP_EXT_HDR_SIZE; else return L2CAP_ENH_HDR_SIZE; } static struct sk_buff *l2cap_create_sframe_pdu(struct l2cap_chan *chan, u32 control) { struct sk_buff *skb; struct l2cap_hdr *lh; int hlen = __ertm_hdr_size(chan); if (chan->fcs == L2CAP_FCS_CRC16) hlen += L2CAP_FCS_SIZE; skb = bt_skb_alloc(hlen, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); lh = skb_put(skb, L2CAP_HDR_SIZE); lh->len = cpu_to_le16(hlen - L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); if (test_bit(FLAG_EXT_CTRL, &chan->flags)) put_unaligned_le32(control, skb_put(skb, L2CAP_EXT_CTRL_SIZE)); else put_unaligned_le16(control, skb_put(skb, L2CAP_ENH_CTRL_SIZE)); if (chan->fcs == L2CAP_FCS_CRC16) { u16 fcs = crc16(0, (u8 *)skb->data, skb->len); put_unaligned_le16(fcs, skb_put(skb, L2CAP_FCS_SIZE)); } skb->priority = HCI_PRIO_MAX; return skb; } static void l2cap_send_sframe(struct l2cap_chan *chan, struct l2cap_ctrl *control) { struct sk_buff *skb; u32 control_field; BT_DBG("chan %p, control %p", chan, control); if (!control->sframe) return; if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state) && !control->poll) control->final = 1; if (control->super == L2CAP_SUPER_RR) clear_bit(CONN_RNR_SENT, &chan->conn_state); else if (control->super == L2CAP_SUPER_RNR) set_bit(CONN_RNR_SENT, &chan->conn_state); if (control->super != L2CAP_SUPER_SREJ) { chan->last_acked_seq = control->reqseq; __clear_ack_timer(chan); } BT_DBG("reqseq %d, final %d, poll %d, super %d", control->reqseq, control->final, control->poll, control->super); if (test_bit(FLAG_EXT_CTRL, &chan->flags)) control_field = __pack_extended_control(control); else control_field = __pack_enhanced_control(control); skb = l2cap_create_sframe_pdu(chan, control_field); if (!IS_ERR(skb)) l2cap_do_send(chan, skb); } static void l2cap_send_rr_or_rnr(struct l2cap_chan *chan, bool poll) { struct l2cap_ctrl control; BT_DBG("chan %p, poll %d", chan, poll); memset(&control, 0, sizeof(control)); control.sframe = 1; control.poll = poll; if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) control.super = L2CAP_SUPER_RNR; else control.super = L2CAP_SUPER_RR; control.reqseq = chan->buffer_seq; l2cap_send_sframe(chan, &control); } static inline int __l2cap_no_conn_pending(struct l2cap_chan *chan) { if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) return true; return !test_bit(CONF_CONNECT_PEND, &chan->conf_state); } void l2cap_send_conn_req(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_conn_req req; req.scid = cpu_to_le16(chan->scid); req.psm = chan->psm; chan->ident = l2cap_get_ident(conn); set_bit(CONF_CONNECT_PEND, &chan->conf_state); l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_REQ, sizeof(req), &req); } static void l2cap_chan_ready(struct l2cap_chan *chan) { /* The channel may have already been flagged as connected in * case of receiving data before the L2CAP info req/rsp * procedure is complete. */ if (chan->state == BT_CONNECTED) return; /* This clears all conf flags, including CONF_NOT_COMPLETE */ chan->conf_state = 0; __clear_chan_timer(chan); switch (chan->mode) { case L2CAP_MODE_LE_FLOWCTL: case L2CAP_MODE_EXT_FLOWCTL: if (!chan->tx_credits) chan->ops->suspend(chan); break; } chan->state = BT_CONNECTED; chan->ops->ready(chan); } static void l2cap_le_connect(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_le_conn_req req; if (test_and_set_bit(FLAG_LE_CONN_REQ_SENT, &chan->flags)) return; if (!chan->imtu) chan->imtu = chan->conn->mtu; l2cap_le_flowctl_init(chan, 0); memset(&req, 0, sizeof(req)); req.psm = chan->psm; req.scid = cpu_to_le16(chan->scid); req.mtu = cpu_to_le16(chan->imtu); req.mps = cpu_to_le16(chan->mps); req.credits = cpu_to_le16(chan->rx_credits); chan->ident = l2cap_get_ident(conn); l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_REQ, sizeof(req), &req); } struct l2cap_ecred_conn_data { struct { struct l2cap_ecred_conn_req_hdr req; __le16 scid[5]; } __packed pdu; struct l2cap_chan *chan; struct pid *pid; int count; }; static void l2cap_ecred_defer_connect(struct l2cap_chan *chan, void *data) { struct l2cap_ecred_conn_data *conn = data; struct pid *pid; if (chan == conn->chan) return; if (!test_and_clear_bit(FLAG_DEFER_SETUP, &chan->flags)) return; pid = chan->ops->get_peer_pid(chan); /* Only add deferred channels with the same PID/PSM */ if (conn->pid != pid || chan->psm != conn->chan->psm || chan->ident || chan->mode != L2CAP_MODE_EXT_FLOWCTL || chan->state != BT_CONNECT) return; if (test_and_set_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) return; l2cap_ecred_init(chan, 0); /* Set the same ident so we can match on the rsp */ chan->ident = conn->chan->ident; /* Include all channels deferred */ conn->pdu.scid[conn->count] = cpu_to_le16(chan->scid); conn->count++; } static void l2cap_ecred_connect(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_ecred_conn_data data; if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) return; if (test_and_set_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) return; l2cap_ecred_init(chan, 0); memset(&data, 0, sizeof(data)); data.pdu.req.psm = chan->psm; data.pdu.req.mtu = cpu_to_le16(chan->imtu); data.pdu.req.mps = cpu_to_le16(chan->mps); data.pdu.req.credits = cpu_to_le16(chan->rx_credits); data.pdu.scid[0] = cpu_to_le16(chan->scid); chan->ident = l2cap_get_ident(conn); data.count = 1; data.chan = chan; data.pid = chan->ops->get_peer_pid(chan); __l2cap_chan_list(conn, l2cap_ecred_defer_connect, &data); l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_CONN_REQ, sizeof(data.pdu.req) + data.count * sizeof(__le16), &data.pdu); } static void l2cap_le_start(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; if (!smp_conn_security(conn->hcon, chan->sec_level)) return; if (!chan->psm) { l2cap_chan_ready(chan); return; } if (chan->state == BT_CONNECT) { if (chan->mode == L2CAP_MODE_EXT_FLOWCTL) l2cap_ecred_connect(chan); else l2cap_le_connect(chan); } } static void l2cap_start_connection(struct l2cap_chan *chan) { if (chan->conn->hcon->type == LE_LINK) { l2cap_le_start(chan); } else { l2cap_send_conn_req(chan); } } static void l2cap_request_info(struct l2cap_conn *conn) { struct l2cap_info_req req; if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) return; req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; conn->info_ident = l2cap_get_ident(conn); schedule_delayed_work(&conn->info_timer, L2CAP_INFO_TIMEOUT); l2cap_send_cmd(conn, conn->info_ident, L2CAP_INFO_REQ, sizeof(req), &req); } static bool l2cap_check_enc_key_size(struct hci_conn *hcon) { /* The minimum encryption key size needs to be enforced by the * host stack before establishing any L2CAP connections. The * specification in theory allows a minimum of 1, but to align * BR/EDR and LE transports, a minimum of 7 is chosen. * * This check might also be called for unencrypted connections * that have no key size requirements. Ensure that the link is * actually encrypted before enforcing a key size. */ int min_key_size = hcon->hdev->min_enc_key_size; /* On FIPS security level, key size must be 16 bytes */ if (hcon->sec_level == BT_SECURITY_FIPS) min_key_size = 16; return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) || hcon->enc_key_size >= min_key_size); } static void l2cap_do_start(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; if (conn->hcon->type == LE_LINK) { l2cap_le_start(chan); return; } if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)) { l2cap_request_info(conn); return; } if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)) return; if (!l2cap_chan_check_security(chan, true) || !__l2cap_no_conn_pending(chan)) return; if (l2cap_check_enc_key_size(conn->hcon)) l2cap_start_connection(chan); else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); } static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask) { u32 local_feat_mask = l2cap_feat_mask; if (!disable_ertm) local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING; switch (mode) { case L2CAP_MODE_ERTM: return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask; case L2CAP_MODE_STREAMING: return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask; default: return 0x00; } } static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err) { struct l2cap_conn *conn = chan->conn; struct l2cap_disconn_req req; if (!conn) return; if (chan->mode == L2CAP_MODE_ERTM && chan->state == BT_CONNECTED) { __clear_retrans_timer(chan); __clear_monitor_timer(chan); __clear_ack_timer(chan); } req.dcid = cpu_to_le16(chan->dcid); req.scid = cpu_to_le16(chan->scid); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_DISCONN_REQ, sizeof(req), &req); l2cap_state_change_and_error(chan, BT_DISCONN, err); } /* ---- L2CAP connections ---- */ static void l2cap_conn_start(struct l2cap_conn *conn) { struct l2cap_chan *chan, *tmp; BT_DBG("conn %p", conn); mutex_lock(&conn->chan_lock); list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { l2cap_chan_lock(chan); if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) { l2cap_chan_ready(chan); l2cap_chan_unlock(chan); continue; } if (chan->state == BT_CONNECT) { if (!l2cap_chan_check_security(chan, true) || !__l2cap_no_conn_pending(chan)) { l2cap_chan_unlock(chan); continue; } if (!l2cap_mode_supported(chan->mode, conn->feat_mask) && test_bit(CONF_STATE2_DEVICE, &chan->conf_state)) { l2cap_chan_close(chan, ECONNRESET); l2cap_chan_unlock(chan); continue; } if (l2cap_check_enc_key_size(conn->hcon)) l2cap_start_connection(chan); else l2cap_chan_close(chan, ECONNREFUSED); } else if (chan->state == BT_CONNECT2) { struct l2cap_conn_rsp rsp; char buf[128]; rsp.scid = cpu_to_le16(chan->dcid); rsp.dcid = cpu_to_le16(chan->scid); if (l2cap_chan_check_security(chan, false)) { if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { rsp.result = cpu_to_le16(L2CAP_CR_PEND); rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND); chan->ops->defer(chan); } else { l2cap_state_change(chan, BT_CONFIG); rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); } } else { rsp.result = cpu_to_le16(L2CAP_CR_PEND); rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND); } l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); if (test_bit(CONF_REQ_SENT, &chan->conf_state) || rsp.result != L2CAP_CR_SUCCESS) { l2cap_chan_unlock(chan); continue; } set_bit(CONF_REQ_SENT, &chan->conf_state); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } l2cap_chan_unlock(chan); } mutex_unlock(&conn->chan_lock); } static void l2cap_le_conn_ready(struct l2cap_conn *conn) { struct hci_conn *hcon = conn->hcon; struct hci_dev *hdev = hcon->hdev; BT_DBG("%s conn %p", hdev->name, conn); /* For outgoing pairing which doesn't necessarily have an * associated socket (e.g. mgmt_pair_device). */ if (hcon->out) smp_conn_security(hcon, hcon->pending_sec_level); /* For LE peripheral connections, make sure the connection interval * is in the range of the minimum and maximum interval that has * been configured for this connection. If not, then trigger * the connection update procedure. */ if (hcon->role == HCI_ROLE_SLAVE && (hcon->le_conn_interval < hcon->le_conn_min_interval || hcon->le_conn_interval > hcon->le_conn_max_interval)) { struct l2cap_conn_param_update_req req; req.min = cpu_to_le16(hcon->le_conn_min_interval); req.max = cpu_to_le16(hcon->le_conn_max_interval); req.latency = cpu_to_le16(hcon->le_conn_latency); req.to_multiplier = cpu_to_le16(hcon->le_supv_timeout); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONN_PARAM_UPDATE_REQ, sizeof(req), &req); } } static void l2cap_conn_ready(struct l2cap_conn *conn) { struct l2cap_chan *chan; struct hci_conn *hcon = conn->hcon; BT_DBG("conn %p", conn); if (hcon->type == ACL_LINK) l2cap_request_info(conn); mutex_lock(&conn->chan_lock); list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); if (hcon->type == LE_LINK) { l2cap_le_start(chan); } else if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) { if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) l2cap_chan_ready(chan); } else if (chan->state == BT_CONNECT) { l2cap_do_start(chan); } l2cap_chan_unlock(chan); } mutex_unlock(&conn->chan_lock); if (hcon->type == LE_LINK) l2cap_le_conn_ready(conn); queue_work(hcon->hdev->workqueue, &conn->pending_rx_work); } /* Notify sockets that we cannot guaranty reliability anymore */ static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err) { struct l2cap_chan *chan; BT_DBG("conn %p", conn); mutex_lock(&conn->chan_lock); list_for_each_entry(chan, &conn->chan_l, list) { if (test_bit(FLAG_FORCE_RELIABLE, &chan->flags)) l2cap_chan_set_err(chan, err); } mutex_unlock(&conn->chan_lock); } static void l2cap_info_timeout(struct work_struct *work) { struct l2cap_conn *conn = container_of(work, struct l2cap_conn, info_timer.work); conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; l2cap_conn_start(conn); } /* * l2cap_user * External modules can register l2cap_user objects on l2cap_conn. The ->probe * callback is called during registration. The ->remove callback is called * during unregistration. * An l2cap_user object can either be explicitly unregistered or when the * underlying l2cap_conn object is deleted. This guarantees that l2cap->hcon, * l2cap->hchan, .. are valid as long as the remove callback hasn't been called. * External modules must own a reference to the l2cap_conn object if they intend * to call l2cap_unregister_user(). The l2cap_conn object might get destroyed at * any time if they don't. */ int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user) { struct hci_dev *hdev = conn->hcon->hdev; int ret; /* We need to check whether l2cap_conn is registered. If it is not, we * must not register the l2cap_user. l2cap_conn_del() is unregisters * l2cap_conn objects, but doesn't provide its own locking. Instead, it * relies on the parent hci_conn object to be locked. This itself relies * on the hci_dev object to be locked. So we must lock the hci device * here, too. */ hci_dev_lock(hdev); if (!list_empty(&user->list)) { ret = -EINVAL; goto out_unlock; } /* conn->hchan is NULL after l2cap_conn_del() was called */ if (!conn->hchan) { ret = -ENODEV; goto out_unlock; } ret = user->probe(conn, user); if (ret) goto out_unlock; list_add(&user->list, &conn->users); ret = 0; out_unlock: hci_dev_unlock(hdev); return ret; } EXPORT_SYMBOL(l2cap_register_user); void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user) { struct hci_dev *hdev = conn->hcon->hdev; hci_dev_lock(hdev); if (list_empty(&user->list)) goto out_unlock; list_del_init(&user->list); user->remove(conn, user); out_unlock: hci_dev_unlock(hdev); } EXPORT_SYMBOL(l2cap_unregister_user); static void l2cap_unregister_all_users(struct l2cap_conn *conn) { struct l2cap_user *user; while (!list_empty(&conn->users)) { user = list_first_entry(&conn->users, struct l2cap_user, list); list_del_init(&user->list); user->remove(conn, user); } } static void l2cap_conn_del(struct hci_conn *hcon, int err) { struct l2cap_conn *conn = hcon->l2cap_data; struct l2cap_chan *chan, *l; if (!conn) return; BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); kfree_skb(conn->rx_skb); skb_queue_purge(&conn->pending_rx); /* We can not call flush_work(&conn->pending_rx_work) here since we * might block if we are running on a worker from the same workqueue * pending_rx_work is waiting on. */ if (work_pending(&conn->pending_rx_work)) cancel_work_sync(&conn->pending_rx_work); cancel_delayed_work_sync(&conn->id_addr_timer); l2cap_unregister_all_users(conn); /* Force the connection to be immediately dropped */ hcon->disc_timeout = 0; mutex_lock(&conn->chan_lock); /* Kill channels */ list_for_each_entry_safe(chan, l, &conn->chan_l, list) { l2cap_chan_hold(chan); l2cap_chan_lock(chan); l2cap_chan_del(chan, err); chan->ops->close(chan); l2cap_chan_unlock(chan); l2cap_chan_put(chan); } mutex_unlock(&conn->chan_lock); hci_chan_del(conn->hchan); if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) cancel_delayed_work_sync(&conn->info_timer); hcon->l2cap_data = NULL; conn->hchan = NULL; l2cap_conn_put(conn); } static void l2cap_conn_free(struct kref *ref) { struct l2cap_conn *conn = container_of(ref, struct l2cap_conn, ref); hci_conn_put(conn->hcon); kfree(conn); } struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn) { kref_get(&conn->ref); return conn; } EXPORT_SYMBOL(l2cap_conn_get); void l2cap_conn_put(struct l2cap_conn *conn) { kref_put(&conn->ref, l2cap_conn_free); } EXPORT_SYMBOL(l2cap_conn_put); /* ---- Socket interface ---- */ /* Find socket with psm and source / destination bdaddr. * Returns closest match. */ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, bdaddr_t *src, bdaddr_t *dst, u8 link_type) { struct l2cap_chan *c, *tmp, *c1 = NULL; read_lock(&chan_list_lock); list_for_each_entry_safe(c, tmp, &chan_list, global_l) { if (state && c->state != state) continue; if (link_type == ACL_LINK && c->src_type != BDADDR_BREDR) continue; if (link_type == LE_LINK && c->src_type == BDADDR_BREDR) continue; if (c->chan_type != L2CAP_CHAN_FIXED && c->psm == psm) { int src_match, dst_match; int src_any, dst_any; /* Exact match. */ src_match = !bacmp(&c->src, src); dst_match = !bacmp(&c->dst, dst); if (src_match && dst_match) { if (!l2cap_chan_hold_unless_zero(c)) continue; read_unlock(&chan_list_lock); return c; } /* Closest match */ src_any = !bacmp(&c->src, BDADDR_ANY); dst_any = !bacmp(&c->dst, BDADDR_ANY); if ((src_match && dst_any) || (src_any && dst_match) || (src_any && dst_any)) c1 = c; } } if (c1) c1 = l2cap_chan_hold_unless_zero(c1); read_unlock(&chan_list_lock); return c1; } static void l2cap_monitor_timeout(struct work_struct *work) { struct l2cap_chan *chan = container_of(work, struct l2cap_chan, monitor_timer.work); BT_DBG("chan %p", chan); l2cap_chan_lock(chan); if (!chan->conn) { l2cap_chan_unlock(chan); l2cap_chan_put(chan); return; } l2cap_tx(chan, NULL, NULL, L2CAP_EV_MONITOR_TO); l2cap_chan_unlock(chan); l2cap_chan_put(chan); } static void l2cap_retrans_timeout(struct work_struct *work) { struct l2cap_chan *chan = container_of(work, struct l2cap_chan, retrans_timer.work); BT_DBG("chan %p", chan); l2cap_chan_lock(chan); if (!chan->conn) { l2cap_chan_unlock(chan); l2cap_chan_put(chan); return; } l2cap_tx(chan, NULL, NULL, L2CAP_EV_RETRANS_TO); l2cap_chan_unlock(chan); l2cap_chan_put(chan); } static void l2cap_streaming_send(struct l2cap_chan *chan, struct sk_buff_head *skbs) { struct sk_buff *skb; struct l2cap_ctrl *control; BT_DBG("chan %p, skbs %p", chan, skbs); skb_queue_splice_tail_init(skbs, &chan->tx_q); while (!skb_queue_empty(&chan->tx_q)) { skb = skb_dequeue(&chan->tx_q); bt_cb(skb)->l2cap.retries = 1; control = &bt_cb(skb)->l2cap; control->reqseq = 0; control->txseq = chan->next_tx_seq; __pack_control(chan, control, skb); if (chan->fcs == L2CAP_FCS_CRC16) { u16 fcs = crc16(0, (u8 *) skb->data, skb->len); put_unaligned_le16(fcs, skb_put(skb, L2CAP_FCS_SIZE)); } l2cap_do_send(chan, skb); BT_DBG("Sent txseq %u", control->txseq); chan->next_tx_seq = __next_seq(chan, chan->next_tx_seq); chan->frames_sent++; } } static int l2cap_ertm_send(struct l2cap_chan *chan) { struct sk_buff *skb, *tx_skb; struct l2cap_ctrl *control; int sent = 0; BT_DBG("chan %p", chan); if (chan->state != BT_CONNECTED) return -ENOTCONN; if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state)) return 0; while (chan->tx_send_head && chan->unacked_frames < chan->remote_tx_win && chan->tx_state == L2CAP_TX_STATE_XMIT) { skb = chan->tx_send_head; bt_cb(skb)->l2cap.retries = 1; control = &bt_cb(skb)->l2cap; if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state)) control->final = 1; control->reqseq = chan->buffer_seq; chan->last_acked_seq = chan->buffer_seq; control->txseq = chan->next_tx_seq; __pack_control(chan, control, skb); if (chan->fcs == L2CAP_FCS_CRC16) { u16 fcs = crc16(0, (u8 *) skb->data, skb->len); put_unaligned_le16(fcs, skb_put(skb, L2CAP_FCS_SIZE)); } /* Clone after data has been modified. Data is assumed to be read-only (for locking purposes) on cloned sk_buffs. */ tx_skb = skb_clone(skb, GFP_KERNEL); if (!tx_skb) break; __set_retrans_timer(chan); chan->next_tx_seq = __next_seq(chan, chan->next_tx_seq); chan->unacked_frames++; chan->frames_sent++; sent++; if (skb_queue_is_last(&chan->tx_q, skb)) chan->tx_send_head = NULL; else chan->tx_send_head = skb_queue_next(&chan->tx_q, skb); l2cap_do_send(chan, tx_skb); BT_DBG("Sent txseq %u", control->txseq); } BT_DBG("Sent %d, %u unacked, %u in ERTM queue", sent, chan->unacked_frames, skb_queue_len(&chan->tx_q)); return sent; } static void l2cap_ertm_resend(struct l2cap_chan *chan) { struct l2cap_ctrl control; struct sk_buff *skb; struct sk_buff *tx_skb; u16 seq; BT_DBG("chan %p", chan); if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state)) return; while (chan->retrans_list.head != L2CAP_SEQ_LIST_CLEAR) { seq = l2cap_seq_list_pop(&chan->retrans_list); skb = l2cap_ertm_seq_in_queue(&chan->tx_q, seq); if (!skb) { BT_DBG("Error: Can't retransmit seq %d, frame missing", seq); continue; } bt_cb(skb)->l2cap.retries++; control = bt_cb(skb)->l2cap; if (chan->max_tx != 0 && bt_cb(skb)->l2cap.retries > chan->max_tx) { BT_DBG("Retry limit exceeded (%d)", chan->max_tx); l2cap_send_disconn_req(chan, ECONNRESET); l2cap_seq_list_clear(&chan->retrans_list); break; } control.reqseq = chan->buffer_seq; if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state)) control.final = 1; else control.final = 0; if (skb_cloned(skb)) { /* Cloned sk_buffs are read-only, so we need a * writeable copy */ tx_skb = skb_copy(skb, GFP_KERNEL); } else { tx_skb = skb_clone(skb, GFP_KERNEL); } if (!tx_skb) { l2cap_seq_list_clear(&chan->retrans_list); break; } /* Update skb contents */ if (test_bit(FLAG_EXT_CTRL, &chan->flags)) { put_unaligned_le32(__pack_extended_control(&control), tx_skb->data + L2CAP_HDR_SIZE); } else { put_unaligned_le16(__pack_enhanced_control(&control), tx_skb->data + L2CAP_HDR_SIZE); } /* Update FCS */ if (chan->fcs == L2CAP_FCS_CRC16) { u16 fcs = crc16(0, (u8 *) tx_skb->data, tx_skb->len - L2CAP_FCS_SIZE); put_unaligned_le16(fcs, skb_tail_pointer(tx_skb) - L2CAP_FCS_SIZE); } l2cap_do_send(chan, tx_skb); BT_DBG("Resent txseq %d", control.txseq); chan->last_acked_seq = chan->buffer_seq; } } static void l2cap_retransmit(struct l2cap_chan *chan, struct l2cap_ctrl *control) { BT_DBG("chan %p, control %p", chan, control); l2cap_seq_list_append(&chan->retrans_list, control->reqseq); l2cap_ertm_resend(chan); } static void l2cap_retransmit_all(struct l2cap_chan *chan, struct l2cap_ctrl *control) { struct sk_buff *skb; BT_DBG("chan %p, control %p", chan, control); if (control->poll) set_bit(CONN_SEND_FBIT, &chan->conn_state); l2cap_seq_list_clear(&chan->retrans_list); if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state)) return; if (chan->unacked_frames) { skb_queue_walk(&chan->tx_q, skb) { if (bt_cb(skb)->l2cap.txseq == control->reqseq || skb == chan->tx_send_head) break; } skb_queue_walk_from(&chan->tx_q, skb) { if (skb == chan->tx_send_head) break; l2cap_seq_list_append(&chan->retrans_list, bt_cb(skb)->l2cap.txseq); } l2cap_ertm_resend(chan); } } static void l2cap_send_ack(struct l2cap_chan *chan) { struct l2cap_ctrl control; u16 frames_to_ack = __seq_offset(chan, chan->buffer_seq, chan->last_acked_seq); int threshold; BT_DBG("chan %p last_acked_seq %d buffer_seq %d", chan, chan->last_acked_seq, chan->buffer_seq); memset(&control, 0, sizeof(control)); control.sframe = 1; if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state) && chan->rx_state == L2CAP_RX_STATE_RECV) { __clear_ack_timer(chan); control.super = L2CAP_SUPER_RNR; control.reqseq = chan->buffer_seq; l2cap_send_sframe(chan, &control); } else { if (!test_bit(CONN_REMOTE_BUSY, &chan->conn_state)) { l2cap_ertm_send(chan); /* If any i-frames were sent, they included an ack */ if (chan->buffer_seq == chan->last_acked_seq) frames_to_ack = 0; } /* Ack now if the window is 3/4ths full. * Calculate without mul or div */ threshold = chan->ack_win; threshold += threshold << 1; threshold >>= 2; BT_DBG("frames_to_ack %u, threshold %d", frames_to_ack, threshold); if (frames_to_ack >= threshold) { __clear_ack_timer(chan); control.super = L2CAP_SUPER_RR; control.reqseq = chan->buffer_seq; l2cap_send_sframe(chan, &control); frames_to_ack = 0; } if (frames_to_ack) __set_ack_timer(chan); } } static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan, struct msghdr *msg, int len, int count, struct sk_buff *skb) { struct l2cap_conn *conn = chan->conn; struct sk_buff **frag; int sent = 0; if (!copy_from_iter_full(skb_put(skb, count), count, &msg->msg_iter)) return -EFAULT; sent += count; len -= count; /* Continuation fragments (no L2CAP header) */ frag = &skb_shinfo(skb)->frag_list; while (len) { struct sk_buff *tmp; count = min_t(unsigned int, conn->mtu, len); tmp = chan->ops->alloc_skb(chan, 0, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(tmp)) return PTR_ERR(tmp); *frag = tmp; if (!copy_from_iter_full(skb_put(*frag, count), count, &msg->msg_iter)) return -EFAULT; sent += count; len -= count; skb->len += (*frag)->len; skb->data_len += (*frag)->len; frag = &(*frag)->next; } return sent; } static struct sk_buff *l2cap_create_connless_pdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len) { struct l2cap_conn *conn = chan->conn; struct sk_buff *skb; int err, count, hlen = L2CAP_HDR_SIZE + L2CAP_PSMLEN_SIZE; struct l2cap_hdr *lh; BT_DBG("chan %p psm 0x%2.2x len %zu", chan, __le16_to_cpu(chan->psm), len); count = min_t(unsigned int, (conn->mtu - hlen), len); skb = chan->ops->alloc_skb(chan, hlen, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(skb)) return skb; /* Create L2CAP header */ lh = skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); lh->len = cpu_to_le16(len + L2CAP_PSMLEN_SIZE); put_unaligned(chan->psm, (__le16 *) skb_put(skb, L2CAP_PSMLEN_SIZE)); err = l2cap_skbuff_fromiovec(chan, msg, len, count, skb); if (unlikely(err < 0)) { kfree_skb(skb); return ERR_PTR(err); } return skb; } static struct sk_buff *l2cap_create_basic_pdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len) { struct l2cap_conn *conn = chan->conn; struct sk_buff *skb; int err, count; struct l2cap_hdr *lh; BT_DBG("chan %p len %zu", chan, len); count = min_t(unsigned int, (conn->mtu - L2CAP_HDR_SIZE), len); skb = chan->ops->alloc_skb(chan, L2CAP_HDR_SIZE, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(skb)) return skb; /* Create L2CAP header */ lh = skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); lh->len = cpu_to_le16(len); err = l2cap_skbuff_fromiovec(chan, msg, len, count, skb); if (unlikely(err < 0)) { kfree_skb(skb); return ERR_PTR(err); } return skb; } static struct sk_buff *l2cap_create_iframe_pdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len, u16 sdulen) { struct l2cap_conn *conn = chan->conn; struct sk_buff *skb; int err, count, hlen; struct l2cap_hdr *lh; BT_DBG("chan %p len %zu", chan, len); if (!conn) return ERR_PTR(-ENOTCONN); hlen = __ertm_hdr_size(chan); if (sdulen) hlen += L2CAP_SDULEN_SIZE; if (chan->fcs == L2CAP_FCS_CRC16) hlen += L2CAP_FCS_SIZE; count = min_t(unsigned int, (conn->mtu - hlen), len); skb = chan->ops->alloc_skb(chan, hlen, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(skb)) return skb; /* Create L2CAP header */ lh = skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); /* Control header is populated later */ if (test_bit(FLAG_EXT_CTRL, &chan->flags)) put_unaligned_le32(0, skb_put(skb, L2CAP_EXT_CTRL_SIZE)); else put_unaligned_le16(0, skb_put(skb, L2CAP_ENH_CTRL_SIZE)); if (sdulen) put_unaligned_le16(sdulen, skb_put(skb, L2CAP_SDULEN_SIZE)); err = l2cap_skbuff_fromiovec(chan, msg, len, count, skb); if (unlikely(err < 0)) { kfree_skb(skb); return ERR_PTR(err); } bt_cb(skb)->l2cap.fcs = chan->fcs; bt_cb(skb)->l2cap.retries = 0; return skb; } static int l2cap_segment_sdu(struct l2cap_chan *chan, struct sk_buff_head *seg_queue, struct msghdr *msg, size_t len) { struct sk_buff *skb; u16 sdu_len; size_t pdu_len; u8 sar; BT_DBG("chan %p, msg %p, len %zu", chan, msg, len); /* It is critical that ERTM PDUs fit in a single HCI fragment, * so fragmented skbs are not used. The HCI layer's handling * of fragmented skbs is not compatible with ERTM's queueing. */ /* PDU size is derived from the HCI MTU */ pdu_len = chan->conn->mtu; /* Constrain PDU size for BR/EDR connections */ pdu_len = min_t(size_t, pdu_len, L2CAP_BREDR_MAX_PAYLOAD); /* Adjust for largest possible L2CAP overhead. */ if (chan->fcs) pdu_len -= L2CAP_FCS_SIZE; pdu_len -= __ertm_hdr_size(chan); /* Remote device may have requested smaller PDUs */ pdu_len = min_t(size_t, pdu_len, chan->remote_mps); if (len <= pdu_len) { sar = L2CAP_SAR_UNSEGMENTED; sdu_len = 0; pdu_len = len; } else { sar = L2CAP_SAR_START; sdu_len = len; } while (len > 0) { skb = l2cap_create_iframe_pdu(chan, msg, pdu_len, sdu_len); if (IS_ERR(skb)) { __skb_queue_purge(seg_queue); return PTR_ERR(skb); } bt_cb(skb)->l2cap.sar = sar; __skb_queue_tail(seg_queue, skb); len -= pdu_len; if (sdu_len) sdu_len = 0; if (len <= pdu_len) { sar = L2CAP_SAR_END; pdu_len = len; } else { sar = L2CAP_SAR_CONTINUE; } } return 0; } static struct sk_buff *l2cap_create_le_flowctl_pdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len, u16 sdulen) { struct l2cap_conn *conn = chan->conn; struct sk_buff *skb; int err, count, hlen; struct l2cap_hdr *lh; BT_DBG("chan %p len %zu", chan, len); if (!conn) return ERR_PTR(-ENOTCONN); hlen = L2CAP_HDR_SIZE; if (sdulen) hlen += L2CAP_SDULEN_SIZE; count = min_t(unsigned int, (conn->mtu - hlen), len); skb = chan->ops->alloc_skb(chan, hlen, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(skb)) return skb; /* Create L2CAP header */ lh = skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); if (sdulen) put_unaligned_le16(sdulen, skb_put(skb, L2CAP_SDULEN_SIZE)); err = l2cap_skbuff_fromiovec(chan, msg, len, count, skb); if (unlikely(err < 0)) { kfree_skb(skb); return ERR_PTR(err); } return skb; } static int l2cap_segment_le_sdu(struct l2cap_chan *chan, struct sk_buff_head *seg_queue, struct msghdr *msg, size_t len) { struct sk_buff *skb; size_t pdu_len; u16 sdu_len; BT_DBG("chan %p, msg %p, len %zu", chan, msg, len); sdu_len = len; pdu_len = chan->remote_mps - L2CAP_SDULEN_SIZE; while (len > 0) { if (len <= pdu_len) pdu_len = len; skb = l2cap_create_le_flowctl_pdu(chan, msg, pdu_len, sdu_len); if (IS_ERR(skb)) { __skb_queue_purge(seg_queue); return PTR_ERR(skb); } __skb_queue_tail(seg_queue, skb); len -= pdu_len; if (sdu_len) { sdu_len = 0; pdu_len += L2CAP_SDULEN_SIZE; } } return 0; } static void l2cap_le_flowctl_send(struct l2cap_chan *chan) { int sent = 0; BT_DBG("chan %p", chan); while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) { l2cap_do_send(chan, skb_dequeue(&chan->tx_q)); chan->tx_credits--; sent++; } BT_DBG("Sent %d credits %u queued %u", sent, chan->tx_credits, skb_queue_len(&chan->tx_q)); } int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) { struct sk_buff *skb; int err; struct sk_buff_head seg_queue; if (!chan->conn) return -ENOTCONN; /* Connectionless channel */ if (chan->chan_type == L2CAP_CHAN_CONN_LESS) { skb = l2cap_create_connless_pdu(chan, msg, len); if (IS_ERR(skb)) return PTR_ERR(skb); l2cap_do_send(chan, skb); return len; } switch (chan->mode) { case L2CAP_MODE_LE_FLOWCTL: case L2CAP_MODE_EXT_FLOWCTL: /* Check outgoing MTU */ if (len > chan->omtu) return -EMSGSIZE; __skb_queue_head_init(&seg_queue); err = l2cap_segment_le_sdu(chan, &seg_queue, msg, len); if (chan->state != BT_CONNECTED) { __skb_queue_purge(&seg_queue); err = -ENOTCONN; } if (err) return err; skb_queue_splice_tail_init(&seg_queue, &chan->tx_q); l2cap_le_flowctl_send(chan); if (!chan->tx_credits) chan->ops->suspend(chan); err = len; break; case L2CAP_MODE_BASIC: /* Check outgoing MTU */ if (len > chan->omtu) return -EMSGSIZE; /* Create a basic PDU */ skb = l2cap_create_basic_pdu(chan, msg, len); if (IS_ERR(skb)) return PTR_ERR(skb); l2cap_do_send(chan, skb); err = len; break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: /* Check outgoing MTU */ if (len > chan->omtu) { err = -EMSGSIZE; break; } __skb_queue_head_init(&seg_queue); /* Do segmentation before calling in to the state machine, * since it's possible to block while waiting for memory * allocation. */ err = l2cap_segment_sdu(chan, &seg_queue, msg, len); if (err) break; if (chan->mode == L2CAP_MODE_ERTM) l2cap_tx(chan, NULL, &seg_queue, L2CAP_EV_DATA_REQUEST); else l2cap_streaming_send(chan, &seg_queue); err = len; /* If the skbs were not queued for sending, they'll still be in * seg_queue and need to be purged. */ __skb_queue_purge(&seg_queue); break; default: BT_DBG("bad state %1.1x", chan->mode); err = -EBADFD; } return err; } EXPORT_SYMBOL_GPL(l2cap_chan_send); static void l2cap_send_srej(struct l2cap_chan *chan, u16 txseq) { struct l2cap_ctrl control; u16 seq; BT_DBG("chan %p, txseq %u", chan, txseq); memset(&control, 0, sizeof(control)); control.sframe = 1; control.super = L2CAP_SUPER_SREJ; for (seq = chan->expected_tx_seq; seq != txseq; seq = __next_seq(chan, seq)) { if (!l2cap_ertm_seq_in_queue(&chan->srej_q, seq)) { control.reqseq = seq; l2cap_send_sframe(chan, &control); l2cap_seq_list_append(&chan->srej_list, seq); } } chan->expected_tx_seq = __next_seq(chan, txseq); } static void l2cap_send_srej_tail(struct l2cap_chan *chan) { struct l2cap_ctrl control; BT_DBG("chan %p", chan); if (chan->srej_list.tail == L2CAP_SEQ_LIST_CLEAR) return; memset(&control, 0, sizeof(control)); control.sframe = 1; control.super = L2CAP_SUPER_SREJ; control.reqseq = chan->srej_list.tail; l2cap_send_sframe(chan, &control); } static void l2cap_send_srej_list(struct l2cap_chan *chan, u16 txseq) { struct l2cap_ctrl control; u16 initial_head; u16 seq; BT_DBG("chan %p, txseq %u", chan, txseq); memset(&control, 0, sizeof(control)); control.sframe = 1; control.super = L2CAP_SUPER_SREJ; /* Capture initial list head to allow only one pass through the list. */ initial_head = chan->srej_list.head; do { seq = l2cap_seq_list_pop(&chan->srej_list); if (seq == txseq || seq == L2CAP_SEQ_LIST_CLEAR) break; control.reqseq = seq; l2cap_send_sframe(chan, &control); l2cap_seq_list_append(&chan->srej_list, seq); } while (chan->srej_list.head != initial_head); } static void l2cap_process_reqseq(struct l2cap_chan *chan, u16 reqseq) { struct sk_buff *acked_skb; u16 ackseq; BT_DBG("chan %p, reqseq %u", chan, reqseq); if (chan->unacked_frames == 0 || reqseq == chan->expected_ack_seq) return; BT_DBG("expected_ack_seq %u, unacked_frames %u", chan->expected_ack_seq, chan->unacked_frames); for (ackseq = chan->expected_ack_seq; ackseq != reqseq; ackseq = __next_seq(chan, ackseq)) { acked_skb = l2cap_ertm_seq_in_queue(&chan->tx_q, ackseq); if (acked_skb) { skb_unlink(acked_skb, &chan->tx_q); kfree_skb(acked_skb); chan->unacked_frames--; } } chan->expected_ack_seq = reqseq; if (chan->unacked_frames == 0) __clear_retrans_timer(chan); BT_DBG("unacked_frames %u", chan->unacked_frames); } static void l2cap_abort_rx_srej_sent(struct l2cap_chan *chan) { BT_DBG("chan %p", chan); chan->expected_tx_seq = chan->buffer_seq; l2cap_seq_list_clear(&chan->srej_list); skb_queue_purge(&chan->srej_q); chan->rx_state = L2CAP_RX_STATE_RECV; } static void l2cap_tx_state_xmit(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff_head *skbs, u8 event) { BT_DBG("chan %p, control %p, skbs %p, event %d", chan, control, skbs, event); switch (event) { case L2CAP_EV_DATA_REQUEST: if (chan->tx_send_head == NULL) chan->tx_send_head = skb_peek(skbs); skb_queue_splice_tail_init(skbs, &chan->tx_q); l2cap_ertm_send(chan); break; case L2CAP_EV_LOCAL_BUSY_DETECTED: BT_DBG("Enter LOCAL_BUSY"); set_bit(CONN_LOCAL_BUSY, &chan->conn_state); if (chan->rx_state == L2CAP_RX_STATE_SREJ_SENT) { /* The SREJ_SENT state must be aborted if we are to * enter the LOCAL_BUSY state. */ l2cap_abort_rx_srej_sent(chan); } l2cap_send_ack(chan); break; case L2CAP_EV_LOCAL_BUSY_CLEAR: BT_DBG("Exit LOCAL_BUSY"); clear_bit(CONN_LOCAL_BUSY, &chan->conn_state); if (test_bit(CONN_RNR_SENT, &chan->conn_state)) { struct l2cap_ctrl local_control; memset(&local_control, 0, sizeof(local_control)); local_control.sframe = 1; local_control.super = L2CAP_SUPER_RR; local_control.poll = 1; local_control.reqseq = chan->buffer_seq; l2cap_send_sframe(chan, &local_control); chan->retry_count = 1; __set_monitor_timer(chan); chan->tx_state = L2CAP_TX_STATE_WAIT_F; } break; case L2CAP_EV_RECV_REQSEQ_AND_FBIT: l2cap_process_reqseq(chan, control->reqseq); break; case L2CAP_EV_EXPLICIT_POLL: l2cap_send_rr_or_rnr(chan, 1); chan->retry_count = 1; __set_monitor_timer(chan); __clear_ack_timer(chan); chan->tx_state = L2CAP_TX_STATE_WAIT_F; break; case L2CAP_EV_RETRANS_TO: l2cap_send_rr_or_rnr(chan, 1); chan->retry_count = 1; __set_monitor_timer(chan); chan->tx_state = L2CAP_TX_STATE_WAIT_F; break; case L2CAP_EV_RECV_FBIT: /* Nothing to process */ break; default: break; } } static void l2cap_tx_state_wait_f(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff_head *skbs, u8 event) { BT_DBG("chan %p, control %p, skbs %p, event %d", chan, control, skbs, event); switch (event) { case L2CAP_EV_DATA_REQUEST: if (chan->tx_send_head == NULL) chan->tx_send_head = skb_peek(skbs); /* Queue data, but don't send. */ skb_queue_splice_tail_init(skbs, &chan->tx_q); break; case L2CAP_EV_LOCAL_BUSY_DETECTED: BT_DBG("Enter LOCAL_BUSY"); set_bit(CONN_LOCAL_BUSY, &chan->conn_state); if (chan->rx_state == L2CAP_RX_STATE_SREJ_SENT) { /* The SREJ_SENT state must be aborted if we are to * enter the LOCAL_BUSY state. */ l2cap_abort_rx_srej_sent(chan); } l2cap_send_ack(chan); break; case L2CAP_EV_LOCAL_BUSY_CLEAR: BT_DBG("Exit LOCAL_BUSY"); clear_bit(CONN_LOCAL_BUSY, &chan->conn_state); if (test_bit(CONN_RNR_SENT, &chan->conn_state)) { struct l2cap_ctrl local_control; memset(&local_control, 0, sizeof(local_control)); local_control.sframe = 1; local_control.super = L2CAP_SUPER_RR; local_control.poll = 1; local_control.reqseq = chan->buffer_seq; l2cap_send_sframe(chan, &local_control); chan->retry_count = 1; __set_monitor_timer(chan); chan->tx_state = L2CAP_TX_STATE_WAIT_F; } break; case L2CAP_EV_RECV_REQSEQ_AND_FBIT: l2cap_process_reqseq(chan, control->reqseq); fallthrough; case L2CAP_EV_RECV_FBIT: if (control && control->final) { __clear_monitor_timer(chan); if (chan->unacked_frames > 0) __set_retrans_timer(chan); chan->retry_count = 0; chan->tx_state = L2CAP_TX_STATE_XMIT; BT_DBG("recv fbit tx_state 0x2.2%x", chan->tx_state); } break; case L2CAP_EV_EXPLICIT_POLL: /* Ignore */ break; case L2CAP_EV_MONITOR_TO: if (chan->max_tx == 0 || chan->retry_count < chan->max_tx) { l2cap_send_rr_or_rnr(chan, 1); __set_monitor_timer(chan); chan->retry_count++; } else { l2cap_send_disconn_req(chan, ECONNABORTED); } break; default: break; } } static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff_head *skbs, u8 event) { BT_DBG("chan %p, control %p, skbs %p, event %d, state %d", chan, control, skbs, event, chan->tx_state); switch (chan->tx_state) { case L2CAP_TX_STATE_XMIT: l2cap_tx_state_xmit(chan, control, skbs, event); break; case L2CAP_TX_STATE_WAIT_F: l2cap_tx_state_wait_f(chan, control, skbs, event); break; default: /* Ignore event */ break; } } static void l2cap_pass_to_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control) { BT_DBG("chan %p, control %p", chan, control); l2cap_tx(chan, control, NULL, L2CAP_EV_RECV_REQSEQ_AND_FBIT); } static void l2cap_pass_to_tx_fbit(struct l2cap_chan *chan, struct l2cap_ctrl *control) { BT_DBG("chan %p, control %p", chan, control); l2cap_tx(chan, control, NULL, L2CAP_EV_RECV_FBIT); } /* Copy frame to all raw sockets on that connection */ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) { struct sk_buff *nskb; struct l2cap_chan *chan; BT_DBG("conn %p", conn); mutex_lock(&conn->chan_lock); list_for_each_entry(chan, &conn->chan_l, list) { if (chan->chan_type != L2CAP_CHAN_RAW) continue; /* Don't send frame to the channel it came from */ if (bt_cb(skb)->l2cap.chan == chan) continue; nskb = skb_clone(skb, GFP_KERNEL); if (!nskb) continue; if (chan->ops->recv(chan, nskb)) kfree_skb(nskb); } mutex_unlock(&conn->chan_lock); } /* ---- L2CAP signalling commands ---- */ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, u8 ident, u16 dlen, void *data) { struct sk_buff *skb, **frag; struct l2cap_cmd_hdr *cmd; struct l2cap_hdr *lh; int len, count; BT_DBG("conn %p, code 0x%2.2x, ident 0x%2.2x, len %u", conn, code, ident, dlen); if (conn->mtu < L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE) return NULL; len = L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE + dlen; count = min_t(unsigned int, conn->mtu, len); skb = bt_skb_alloc(count, GFP_KERNEL); if (!skb) return NULL; lh = skb_put(skb, L2CAP_HDR_SIZE); lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen); if (conn->hcon->type == LE_LINK) lh->cid = cpu_to_le16(L2CAP_CID_LE_SIGNALING); else lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING); cmd = skb_put(skb, L2CAP_CMD_HDR_SIZE); cmd->code = code; cmd->ident = ident; cmd->len = cpu_to_le16(dlen); if (dlen) { count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE; skb_put_data(skb, data, count); data += count; } len -= skb->len; /* Continuation fragments (no L2CAP header) */ frag = &skb_shinfo(skb)->frag_list; while (len) { count = min_t(unsigned int, conn->mtu, len); *frag = bt_skb_alloc(count, GFP_KERNEL); if (!*frag) goto fail; skb_put_data(*frag, data, count); len -= count; data += count; frag = &(*frag)->next; } return skb; fail: kfree_skb(skb); return NULL; } static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned long *val) { struct l2cap_conf_opt *opt = *ptr; int len; len = L2CAP_CONF_OPT_SIZE + opt->len; *ptr += len; *type = opt->type; *olen = opt->len; switch (opt->len) { case 1: *val = *((u8 *) opt->val); break; case 2: *val = get_unaligned_le16(opt->val); break; case 4: *val = get_unaligned_le32(opt->val); break; default: *val = (unsigned long) opt->val; break; } BT_DBG("type 0x%2.2x len %u val 0x%lx", *type, opt->len, *val); return len; } static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val, size_t size) { struct l2cap_conf_opt *opt = *ptr; BT_DBG("type 0x%2.2x len %u val 0x%lx", type, len, val); if (size < L2CAP_CONF_OPT_SIZE + len) return; opt->type = type; opt->len = len; switch (len) { case 1: *((u8 *) opt->val) = val; break; case 2: put_unaligned_le16(val, opt->val); break; case 4: put_unaligned_le32(val, opt->val); break; default: memcpy(opt->val, (void *) val, len); break; } *ptr += L2CAP_CONF_OPT_SIZE + len; } static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan, size_t size) { struct l2cap_conf_efs efs; switch (chan->mode) { case L2CAP_MODE_ERTM: efs.id = chan->local_id; efs.stype = chan->local_stype; efs.msdu = cpu_to_le16(chan->local_msdu); efs.sdu_itime = cpu_to_le32(chan->local_sdu_itime); efs.acc_lat = cpu_to_le32(L2CAP_DEFAULT_ACC_LAT); efs.flush_to = cpu_to_le32(L2CAP_EFS_DEFAULT_FLUSH_TO); break; case L2CAP_MODE_STREAMING: efs.id = 1; efs.stype = L2CAP_SERV_BESTEFFORT; efs.msdu = cpu_to_le16(chan->local_msdu); efs.sdu_itime = cpu_to_le32(chan->local_sdu_itime); efs.acc_lat = 0; efs.flush_to = 0; break; default: return; } l2cap_add_conf_opt(ptr, L2CAP_CONF_EFS, sizeof(efs), (unsigned long) &efs, size); } static void l2cap_ack_timeout(struct work_struct *work) { struct l2cap_chan *chan = container_of(work, struct l2cap_chan, ack_timer.work); u16 frames_to_ack; BT_DBG("chan %p", chan); l2cap_chan_lock(chan); frames_to_ack = __seq_offset(chan, chan->buffer_seq, chan->last_acked_seq); if (frames_to_ack) l2cap_send_rr_or_rnr(chan, 0); l2cap_chan_unlock(chan); l2cap_chan_put(chan); } int l2cap_ertm_init(struct l2cap_chan *chan) { int err; chan->next_tx_seq = 0; chan->expected_tx_seq = 0; chan->expected_ack_seq = 0; chan->unacked_frames = 0; chan->buffer_seq = 0; chan->frames_sent = 0; chan->last_acked_seq = 0; chan->sdu = NULL; chan->sdu_last_frag = NULL; chan->sdu_len = 0; skb_queue_head_init(&chan->tx_q); if (chan->mode != L2CAP_MODE_ERTM) return 0; chan->rx_state = L2CAP_RX_STATE_RECV; chan->tx_state = L2CAP_TX_STATE_XMIT; skb_queue_head_init(&chan->srej_q); err = l2cap_seq_list_init(&chan->srej_list, chan->tx_win); if (err < 0) return err; err = l2cap_seq_list_init(&chan->retrans_list, chan->remote_tx_win); if (err < 0) l2cap_seq_list_free(&chan->srej_list); return err; } static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) { switch (mode) { case L2CAP_MODE_STREAMING: case L2CAP_MODE_ERTM: if (l2cap_mode_supported(mode, remote_feat_mask)) return mode; fallthrough; default: return L2CAP_MODE_BASIC; } } static inline bool __l2cap_ews_supported(struct l2cap_conn *conn) { return (conn->feat_mask & L2CAP_FEAT_EXT_WINDOW); } static inline bool __l2cap_efs_supported(struct l2cap_conn *conn) { return (conn->feat_mask & L2CAP_FEAT_EXT_FLOW); } static void __l2cap_set_ertm_timeouts(struct l2cap_chan *chan, struct l2cap_conf_rfc *rfc) { rfc->retrans_timeout = cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO); rfc->monitor_timeout = cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO); } static inline void l2cap_txwin_setup(struct l2cap_chan *chan) { if (chan->tx_win > L2CAP_DEFAULT_TX_WINDOW && __l2cap_ews_supported(chan->conn)) { /* use extended control field */ set_bit(FLAG_EXT_CTRL, &chan->flags); chan->tx_win_max = L2CAP_DEFAULT_EXT_WINDOW; } else { chan->tx_win = min_t(u16, chan->tx_win, L2CAP_DEFAULT_TX_WINDOW); chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW; } chan->ack_win = chan->tx_win; } static void l2cap_mtu_auto(struct l2cap_chan *chan) { struct hci_conn *conn = chan->conn->hcon; chan->imtu = L2CAP_DEFAULT_MIN_MTU; /* The 2-DH1 packet has between 2 and 56 information bytes * (including the 2-byte payload header) */ if (!(conn->pkt_type & HCI_2DH1)) chan->imtu = 54; /* The 3-DH1 packet has between 2 and 85 information bytes * (including the 2-byte payload header) */ if (!(conn->pkt_type & HCI_3DH1)) chan->imtu = 83; /* The 2-DH3 packet has between 2 and 369 information bytes * (including the 2-byte payload header) */ if (!(conn->pkt_type & HCI_2DH3)) chan->imtu = 367; /* The 3-DH3 packet has between 2 and 554 information bytes * (including the 2-byte payload header) */ if (!(conn->pkt_type & HCI_3DH3)) chan->imtu = 552; /* The 2-DH5 packet has between 2 and 681 information bytes * (including the 2-byte payload header) */ if (!(conn->pkt_type & HCI_2DH5)) chan->imtu = 679; /* The 3-DH5 packet has between 2 and 1023 information bytes * (including the 2-byte payload header) */ if (!(conn->pkt_type & HCI_3DH5)) chan->imtu = 1021; } static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size) { struct l2cap_conf_req *req = data; struct l2cap_conf_rfc rfc = { .mode = chan->mode }; void *ptr = req->data; void *endptr = data + data_size; u16 size; BT_DBG("chan %p", chan); if (chan->num_conf_req || chan->num_conf_rsp) goto done; switch (chan->mode) { case L2CAP_MODE_STREAMING: case L2CAP_MODE_ERTM: if (test_bit(CONF_STATE2_DEVICE, &chan->conf_state)) break; if (__l2cap_efs_supported(chan->conn)) set_bit(FLAG_EFS_ENABLE, &chan->flags); fallthrough; default: chan->mode = l2cap_select_mode(rfc.mode, chan->conn->feat_mask); break; } done: if (chan->imtu != L2CAP_DEFAULT_MTU) { if (!chan->imtu) l2cap_mtu_auto(chan); l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr); } switch (chan->mode) { case L2CAP_MODE_BASIC: if (disable_ertm) break; if (!(chan->conn->feat_mask & L2CAP_FEAT_ERTM) && !(chan->conn->feat_mask & L2CAP_FEAT_STREAMING)) break; rfc.mode = L2CAP_MODE_BASIC; rfc.txwin_size = 0; rfc.max_transmit = 0; rfc.retrans_timeout = 0; rfc.monitor_timeout = 0; rfc.max_pdu_size = 0; l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); break; case L2CAP_MODE_ERTM: rfc.mode = L2CAP_MODE_ERTM; rfc.max_transmit = chan->max_tx; __l2cap_set_ertm_timeouts(chan, &rfc); size = min_t(u16, L2CAP_DEFAULT_MAX_PDU_SIZE, chan->conn->mtu - L2CAP_EXT_HDR_SIZE - L2CAP_SDULEN_SIZE - L2CAP_FCS_SIZE); rfc.max_pdu_size = cpu_to_le16(size); l2cap_txwin_setup(chan); rfc.txwin_size = min_t(u16, chan->tx_win, L2CAP_DEFAULT_TX_WINDOW); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) l2cap_add_opt_efs(&ptr, chan, endptr - ptr); if (test_bit(FLAG_EXT_CTRL, &chan->flags)) l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, chan->tx_win, endptr - ptr); if (chan->conn->feat_mask & L2CAP_FEAT_FCS) if (chan->fcs == L2CAP_FCS_NONE || test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) { chan->fcs = L2CAP_FCS_NONE; l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, chan->fcs, endptr - ptr); } break; case L2CAP_MODE_STREAMING: l2cap_txwin_setup(chan); rfc.mode = L2CAP_MODE_STREAMING; rfc.txwin_size = 0; rfc.max_transmit = 0; rfc.retrans_timeout = 0; rfc.monitor_timeout = 0; size = min_t(u16, L2CAP_DEFAULT_MAX_PDU_SIZE, chan->conn->mtu - L2CAP_EXT_HDR_SIZE - L2CAP_SDULEN_SIZE - L2CAP_FCS_SIZE); rfc.max_pdu_size = cpu_to_le16(size); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) l2cap_add_opt_efs(&ptr, chan, endptr - ptr); if (chan->conn->feat_mask & L2CAP_FEAT_FCS) if (chan->fcs == L2CAP_FCS_NONE || test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) { chan->fcs = L2CAP_FCS_NONE; l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, chan->fcs, endptr - ptr); } break; } req->dcid = cpu_to_le16(chan->dcid); req->flags = cpu_to_le16(0); return ptr - data; } static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data_size) { struct l2cap_conf_rsp *rsp = data; void *ptr = rsp->data; void *endptr = data + data_size; void *req = chan->conf_req; int len = chan->conf_len; int type, hint, olen; unsigned long val; struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; struct l2cap_conf_efs efs; u8 remote_efs = 0; u16 mtu = L2CAP_DEFAULT_MTU; u16 result = L2CAP_CONF_SUCCESS; u16 size; BT_DBG("chan %p", chan); while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&req, &type, &olen, &val); if (len < 0) break; hint = type & L2CAP_CONF_HINT; type &= L2CAP_CONF_MASK; switch (type) { case L2CAP_CONF_MTU: if (olen != 2) break; mtu = val; break; case L2CAP_CONF_FLUSH_TO: if (olen != 2) break; chan->flush_to = val; break; case L2CAP_CONF_QOS: break; case L2CAP_CONF_RFC: if (olen != sizeof(rfc)) break; memcpy(&rfc, (void *) val, olen); break; case L2CAP_CONF_FCS: if (olen != 1) break; if (val == L2CAP_FCS_NONE) set_bit(CONF_RECV_NO_FCS, &chan->conf_state); break; case L2CAP_CONF_EFS: if (olen != sizeof(efs)) break; remote_efs = 1; memcpy(&efs, (void *) val, olen); break; case L2CAP_CONF_EWS: if (olen != 2) break; return -ECONNREFUSED; default: if (hint) break; result = L2CAP_CONF_UNKNOWN; l2cap_add_conf_opt(&ptr, (u8)type, sizeof(u8), type, endptr - ptr); break; } } if (chan->num_conf_rsp || chan->num_conf_req > 1) goto done; switch (chan->mode) { case L2CAP_MODE_STREAMING: case L2CAP_MODE_ERTM: if (!test_bit(CONF_STATE2_DEVICE, &chan->conf_state)) { chan->mode = l2cap_select_mode(rfc.mode, chan->conn->feat_mask); break; } if (remote_efs) { if (__l2cap_efs_supported(chan->conn)) set_bit(FLAG_EFS_ENABLE, &chan->flags); else return -ECONNREFUSED; } if (chan->mode != rfc.mode) return -ECONNREFUSED; break; } done: if (chan->mode != rfc.mode) { result = L2CAP_CONF_UNACCEPT; rfc.mode = chan->mode; if (chan->num_conf_rsp == 1) return -ECONNREFUSED; l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); } if (result == L2CAP_CONF_SUCCESS) { /* Configure output options and let the other side know * which ones we don't like. */ if (mtu < L2CAP_DEFAULT_MIN_MTU) result = L2CAP_CONF_UNACCEPT; else { chan->omtu = mtu; set_bit(CONF_MTU_DONE, &chan->conf_state); } l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->omtu, endptr - ptr); if (remote_efs) { if (chan->local_stype != L2CAP_SERV_NOTRAFIC && efs.stype != L2CAP_SERV_NOTRAFIC && efs.stype != chan->local_stype) { result = L2CAP_CONF_UNACCEPT; if (chan->num_conf_req >= 1) return -ECONNREFUSED; l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), (unsigned long) &efs, endptr - ptr); } else { /* Send PENDING Conf Rsp */ result = L2CAP_CONF_PENDING; set_bit(CONF_LOC_CONF_PEND, &chan->conf_state); } } switch (rfc.mode) { case L2CAP_MODE_BASIC: chan->fcs = L2CAP_FCS_NONE; set_bit(CONF_MODE_DONE, &chan->conf_state); break; case L2CAP_MODE_ERTM: if (!test_bit(CONF_EWS_RECV, &chan->conf_state)) chan->remote_tx_win = rfc.txwin_size; else rfc.txwin_size = L2CAP_DEFAULT_TX_WINDOW; chan->remote_max_tx = rfc.max_transmit; size = min_t(u16, le16_to_cpu(rfc.max_pdu_size), chan->conn->mtu - L2CAP_EXT_HDR_SIZE - L2CAP_SDULEN_SIZE - L2CAP_FCS_SIZE); rfc.max_pdu_size = cpu_to_le16(size); chan->remote_mps = size; __l2cap_set_ertm_timeouts(chan, &rfc); set_bit(CONF_MODE_DONE, &chan->conf_state); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); if (remote_efs && test_bit(FLAG_EFS_ENABLE, &chan->flags)) { chan->remote_id = efs.id; chan->remote_stype = efs.stype; chan->remote_msdu = le16_to_cpu(efs.msdu); chan->remote_flush_to = le32_to_cpu(efs.flush_to); chan->remote_acc_lat = le32_to_cpu(efs.acc_lat); chan->remote_sdu_itime = le32_to_cpu(efs.sdu_itime); l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), (unsigned long) &efs, endptr - ptr); } break; case L2CAP_MODE_STREAMING: size = min_t(u16, le16_to_cpu(rfc.max_pdu_size), chan->conn->mtu - L2CAP_EXT_HDR_SIZE - L2CAP_SDULEN_SIZE - L2CAP_FCS_SIZE); rfc.max_pdu_size = cpu_to_le16(size); chan->remote_mps = size; set_bit(CONF_MODE_DONE, &chan->conf_state); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); break; default: result = L2CAP_CONF_UNACCEPT; memset(&rfc, 0, sizeof(rfc)); rfc.mode = chan->mode; } if (result == L2CAP_CONF_SUCCESS) set_bit(CONF_OUTPUT_DONE, &chan->conf_state); } rsp->scid = cpu_to_le16(chan->dcid); rsp->result = cpu_to_le16(result); rsp->flags = cpu_to_le16(0); return ptr - data; } static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, void *data, size_t size, u16 *result) { struct l2cap_conf_req *req = data; void *ptr = req->data; void *endptr = data + size; int type, olen; unsigned long val; struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; struct l2cap_conf_efs efs; BT_DBG("chan %p, rsp %p, len %d, req %p", chan, rsp, len, data); while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); if (len < 0) break; switch (type) { case L2CAP_CONF_MTU: if (olen != 2) break; if (val < L2CAP_DEFAULT_MIN_MTU) { *result = L2CAP_CONF_UNACCEPT; chan->imtu = L2CAP_DEFAULT_MIN_MTU; } else chan->imtu = val; l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr); break; case L2CAP_CONF_FLUSH_TO: if (olen != 2) break; chan->flush_to = val; l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, chan->flush_to, endptr - ptr); break; case L2CAP_CONF_RFC: if (olen != sizeof(rfc)) break; memcpy(&rfc, (void *)val, olen); if (test_bit(CONF_STATE2_DEVICE, &chan->conf_state) && rfc.mode != chan->mode) return -ECONNREFUSED; chan->fcs = 0; l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); break; case L2CAP_CONF_EWS: if (olen != 2) break; chan->ack_win = min_t(u16, val, chan->ack_win); l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, chan->tx_win, endptr - ptr); break; case L2CAP_CONF_EFS: if (olen != sizeof(efs)) break; memcpy(&efs, (void *)val, olen); if (chan->local_stype != L2CAP_SERV_NOTRAFIC && efs.stype != L2CAP_SERV_NOTRAFIC && efs.stype != chan->local_stype) return -ECONNREFUSED; l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), (unsigned long) &efs, endptr - ptr); break; case L2CAP_CONF_FCS: if (olen != 1) break; if (*result == L2CAP_CONF_PENDING) if (val == L2CAP_FCS_NONE) set_bit(CONF_RECV_NO_FCS, &chan->conf_state); break; } } if (chan->mode == L2CAP_MODE_BASIC && chan->mode != rfc.mode) return -ECONNREFUSED; chan->mode = rfc.mode; if (*result == L2CAP_CONF_SUCCESS || *result == L2CAP_CONF_PENDING) { switch (rfc.mode) { case L2CAP_MODE_ERTM: chan->retrans_timeout = le16_to_cpu(rfc.retrans_timeout); chan->monitor_timeout = le16_to_cpu(rfc.monitor_timeout); chan->mps = le16_to_cpu(rfc.max_pdu_size); if (!test_bit(FLAG_EXT_CTRL, &chan->flags)) chan->ack_win = min_t(u16, chan->ack_win, rfc.txwin_size); if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) { chan->local_msdu = le16_to_cpu(efs.msdu); chan->local_sdu_itime = le32_to_cpu(efs.sdu_itime); chan->local_acc_lat = le32_to_cpu(efs.acc_lat); chan->local_flush_to = le32_to_cpu(efs.flush_to); } break; case L2CAP_MODE_STREAMING: chan->mps = le16_to_cpu(rfc.max_pdu_size); } } req->dcid = cpu_to_le16(chan->dcid); req->flags = cpu_to_le16(0); return ptr - data; } static int l2cap_build_conf_rsp(struct l2cap_chan *chan, void *data, u16 result, u16 flags) { struct l2cap_conf_rsp *rsp = data; void *ptr = rsp->data; BT_DBG("chan %p", chan); rsp->scid = cpu_to_le16(chan->dcid); rsp->result = cpu_to_le16(result); rsp->flags = cpu_to_le16(flags); return ptr - data; } void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan) { struct l2cap_le_conn_rsp rsp; struct l2cap_conn *conn = chan->conn; BT_DBG("chan %p", chan); rsp.dcid = cpu_to_le16(chan->scid); rsp.mtu = cpu_to_le16(chan->imtu); rsp.mps = cpu_to_le16(chan->mps); rsp.credits = cpu_to_le16(chan->rx_credits); rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS); l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), &rsp); } static void l2cap_ecred_list_defer(struct l2cap_chan *chan, void *data) { int *result = data; if (*result || test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) return; switch (chan->state) { case BT_CONNECT2: /* If channel still pending accept add to result */ (*result)++; return; case BT_CONNECTED: return; default: /* If not connected or pending accept it has been refused */ *result = -ECONNREFUSED; return; } } struct l2cap_ecred_rsp_data { struct { struct l2cap_ecred_conn_rsp_hdr rsp; __le16 scid[L2CAP_ECRED_MAX_CID]; } __packed pdu; int count; }; static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data) { struct l2cap_ecred_rsp_data *rsp = data; struct l2cap_ecred_conn_rsp *rsp_flex = container_of(&rsp->pdu.rsp, struct l2cap_ecred_conn_rsp, hdr); if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) return; /* Reset ident so only one response is sent */ chan->ident = 0; /* Include all channels pending with the same ident */ if (!rsp->pdu.rsp.result) rsp_flex->dcid[rsp->count++] = cpu_to_le16(chan->scid); else l2cap_chan_del(chan, ECONNRESET); } void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_ecred_rsp_data data; u16 id = chan->ident; int result = 0; if (!id) return; BT_DBG("chan %p id %d", chan, id); memset(&data, 0, sizeof(data)); data.pdu.rsp.mtu = cpu_to_le16(chan->imtu); data.pdu.rsp.mps = cpu_to_le16(chan->mps); data.pdu.rsp.credits = cpu_to_le16(chan->rx_credits); data.pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS); /* Verify that all channels are ready */ __l2cap_chan_list_id(conn, id, l2cap_ecred_list_defer, &result); if (result > 0) return; if (result < 0) data.pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_AUTHORIZATION); /* Build response */ __l2cap_chan_list_id(conn, id, l2cap_ecred_rsp_defer, &data); l2cap_send_cmd(conn, id, L2CAP_ECRED_CONN_RSP, sizeof(data.pdu.rsp) + (data.count * sizeof(__le16)), &data.pdu); } void __l2cap_connect_rsp_defer(struct l2cap_chan *chan) { struct l2cap_conn_rsp rsp; struct l2cap_conn *conn = chan->conn; u8 buf[128]; u8 rsp_code; rsp.scid = cpu_to_le16(chan->dcid); rsp.dcid = cpu_to_le16(chan->scid); rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); rsp_code = L2CAP_CONN_RSP; BT_DBG("chan %p rsp_code %u", chan, rsp_code); l2cap_send_cmd(conn, chan->ident, rsp_code, sizeof(rsp), &rsp); if (test_and_set_bit(CONF_REQ_SENT, &chan->conf_state)) return; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len) { int type, olen; unsigned long val; /* Use sane default values in case a misbehaving remote device * did not send an RFC or extended window size option. */ u16 txwin_ext = chan->ack_win; struct l2cap_conf_rfc rfc = { .mode = chan->mode, .retrans_timeout = cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO), .monitor_timeout = cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO), .max_pdu_size = cpu_to_le16(chan->imtu), .txwin_size = min_t(u16, chan->ack_win, L2CAP_DEFAULT_TX_WINDOW), }; BT_DBG("chan %p, rsp %p, len %d", chan, rsp, len); if ((chan->mode != L2CAP_MODE_ERTM) && (chan->mode != L2CAP_MODE_STREAMING)) return; while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); if (len < 0) break; switch (type) { case L2CAP_CONF_RFC: if (olen != sizeof(rfc)) break; memcpy(&rfc, (void *)val, olen); break; case L2CAP_CONF_EWS: if (olen != 2) break; txwin_ext = val; break; } } switch (rfc.mode) { case L2CAP_MODE_ERTM: chan->retrans_timeout = le16_to_cpu(rfc.retrans_timeout); chan->monitor_timeout = le16_to_cpu(rfc.monitor_timeout); chan->mps = le16_to_cpu(rfc.max_pdu_size); if (test_bit(FLAG_EXT_CTRL, &chan->flags)) chan->ack_win = min_t(u16, chan->ack_win, txwin_ext); else chan->ack_win = min_t(u16, chan->ack_win, rfc.txwin_size); break; case L2CAP_MODE_STREAMING: chan->mps = le16_to_cpu(rfc.max_pdu_size); } } static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_cmd_rej_unk *rej = (struct l2cap_cmd_rej_unk *) data; if (cmd_len < sizeof(*rej)) return -EPROTO; if (rej->reason != L2CAP_REJ_NOT_UNDERSTOOD) return 0; if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && cmd->ident == conn->info_ident) { cancel_delayed_work(&conn->info_timer); conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; l2cap_conn_start(conn); } return 0; } static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data, u8 rsp_code) { struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; struct l2cap_conn_rsp rsp; struct l2cap_chan *chan = NULL, *pchan = NULL; int result, status = L2CAP_CS_NO_INFO; u16 dcid = 0, scid = __le16_to_cpu(req->scid); __le16 psm = req->psm; BT_DBG("psm 0x%2.2x scid 0x%4.4x", __le16_to_cpu(psm), scid); /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, &conn->hcon->dst, ACL_LINK); if (!pchan) { result = L2CAP_CR_BAD_PSM; goto response; } mutex_lock(&conn->chan_lock); l2cap_chan_lock(pchan); /* Check if the ACL is secure enough (if not SDP) */ if (psm != cpu_to_le16(L2CAP_PSM_SDP) && !hci_conn_check_link_mode(conn->hcon)) { conn->disc_reason = HCI_ERROR_AUTH_FAILURE; result = L2CAP_CR_SEC_BLOCK; goto response; } result = L2CAP_CR_NO_MEM; /* Check for valid dynamic CID range (as per Erratum 3253) */ if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_DYN_END) { result = L2CAP_CR_INVALID_SCID; goto response; } /* Check if we already have channel with that dcid */ if (__l2cap_get_chan_by_dcid(conn, scid)) { result = L2CAP_CR_SCID_IN_USE; goto response; } chan = pchan->ops->new_connection(pchan); if (!chan) goto response; /* For certain devices (ex: HID mouse), support for authentication, * pairing and bonding is optional. For such devices, inorder to avoid * the ACL alive for too long after L2CAP disconnection, reset the ACL * disc_timeout back to HCI_DISCONN_TIMEOUT during L2CAP connect. */ conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; bacpy(&chan->src, &conn->hcon->src); bacpy(&chan->dst, &conn->hcon->dst); chan->src_type = bdaddr_src_type(conn->hcon); chan->dst_type = bdaddr_dst_type(conn->hcon); chan->psm = psm; chan->dcid = scid; __l2cap_chan_add(conn, chan); dcid = chan->scid; __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); chan->ident = cmd->ident; if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) { if (l2cap_chan_check_security(chan, false)) { if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { l2cap_state_change(chan, BT_CONNECT2); result = L2CAP_CR_PEND; status = L2CAP_CS_AUTHOR_PEND; chan->ops->defer(chan); } else { l2cap_state_change(chan, BT_CONFIG); result = L2CAP_CR_SUCCESS; status = L2CAP_CS_NO_INFO; } } else { l2cap_state_change(chan, BT_CONNECT2); result = L2CAP_CR_PEND; status = L2CAP_CS_AUTHEN_PEND; } } else { l2cap_state_change(chan, BT_CONNECT2); result = L2CAP_CR_PEND; status = L2CAP_CS_NO_INFO; } response: rsp.scid = cpu_to_le16(scid); rsp.dcid = cpu_to_le16(dcid); rsp.result = cpu_to_le16(result); rsp.status = cpu_to_le16(status); l2cap_send_cmd(conn, cmd->ident, rsp_code, sizeof(rsp), &rsp); if (!pchan) return; if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) { struct l2cap_info_req info; info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; conn->info_ident = l2cap_get_ident(conn); schedule_delayed_work(&conn->info_timer, L2CAP_INFO_TIMEOUT); l2cap_send_cmd(conn, conn->info_ident, L2CAP_INFO_REQ, sizeof(info), &info); } if (chan && !test_bit(CONF_REQ_SENT, &chan->conf_state) && result == L2CAP_CR_SUCCESS) { u8 buf[128]; set_bit(CONF_REQ_SENT, &chan->conf_state); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } l2cap_chan_unlock(pchan); mutex_unlock(&conn->chan_lock); l2cap_chan_put(pchan); } static int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { if (cmd_len < sizeof(struct l2cap_conn_req)) return -EPROTO; l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP); return 0; } static int l2cap_connect_create_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_conn_rsp *rsp = (struct l2cap_conn_rsp *) data; u16 scid, dcid, result, status; struct l2cap_chan *chan; u8 req[128]; int err; if (cmd_len < sizeof(*rsp)) return -EPROTO; scid = __le16_to_cpu(rsp->scid); dcid = __le16_to_cpu(rsp->dcid); result = __le16_to_cpu(rsp->result); status = __le16_to_cpu(rsp->status); if (result == L2CAP_CR_SUCCESS && (dcid < L2CAP_CID_DYN_START || dcid > L2CAP_CID_DYN_END)) return -EPROTO; BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x", dcid, scid, result, status); mutex_lock(&conn->chan_lock); if (scid) { chan = __l2cap_get_chan_by_scid(conn, scid); if (!chan) { err = -EBADSLT; goto unlock; } } else { chan = __l2cap_get_chan_by_ident(conn, cmd->ident); if (!chan) { err = -EBADSLT; goto unlock; } } chan = l2cap_chan_hold_unless_zero(chan); if (!chan) { err = -EBADSLT; goto unlock; } err = 0; l2cap_chan_lock(chan); switch (result) { case L2CAP_CR_SUCCESS: if (__l2cap_get_chan_by_dcid(conn, dcid)) { err = -EBADSLT; break; } l2cap_state_change(chan, BT_CONFIG); chan->ident = 0; chan->dcid = dcid; clear_bit(CONF_CONNECT_PEND, &chan->conf_state); if (test_and_set_bit(CONF_REQ_SENT, &chan->conf_state)) break; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(chan, req, sizeof(req)), req); chan->num_conf_req++; break; case L2CAP_CR_PEND: set_bit(CONF_CONNECT_PEND, &chan->conf_state); break; default: l2cap_chan_del(chan, ECONNREFUSED); break; } l2cap_chan_unlock(chan); l2cap_chan_put(chan); unlock: mutex_unlock(&conn->chan_lock); return err; } static inline void set_default_fcs(struct l2cap_chan *chan) { /* FCS is enabled only in ERTM or streaming mode, if one or both * sides request it. */ if (chan->mode != L2CAP_MODE_ERTM && chan->mode != L2CAP_MODE_STREAMING) chan->fcs = L2CAP_FCS_NONE; else if (!test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) chan->fcs = L2CAP_FCS_CRC16; } static void l2cap_send_efs_conf_rsp(struct l2cap_chan *chan, void *data, u8 ident, u16 flags) { struct l2cap_conn *conn = chan->conn; BT_DBG("conn %p chan %p ident %d flags 0x%4.4x", conn, chan, ident, flags); clear_bit(CONF_LOC_CONF_PEND, &chan->conf_state); set_bit(CONF_OUTPUT_DONE, &chan->conf_state); l2cap_send_cmd(conn, ident, L2CAP_CONF_RSP, l2cap_build_conf_rsp(chan, data, L2CAP_CONF_SUCCESS, flags), data); } static void cmd_reject_invalid_cid(struct l2cap_conn *conn, u8 ident, u16 scid, u16 dcid) { struct l2cap_cmd_rej_cid rej; rej.reason = cpu_to_le16(L2CAP_REJ_INVALID_CID); rej.scid = __cpu_to_le16(scid); rej.dcid = __cpu_to_le16(dcid); l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); } static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_conf_req *req = (struct l2cap_conf_req *) data; u16 dcid, flags; u8 rsp[64]; struct l2cap_chan *chan; int len, err = 0; if (cmd_len < sizeof(*req)) return -EPROTO; dcid = __le16_to_cpu(req->dcid); flags = __le16_to_cpu(req->flags); BT_DBG("dcid 0x%4.4x flags 0x%2.2x", dcid, flags); chan = l2cap_get_chan_by_scid(conn, dcid); if (!chan) { cmd_reject_invalid_cid(conn, cmd->ident, dcid, 0); return 0; } if (chan->state != BT_CONFIG && chan->state != BT_CONNECT2 && chan->state != BT_CONNECTED) { cmd_reject_invalid_cid(conn, cmd->ident, chan->scid, chan->dcid); goto unlock; } /* Reject if config buffer is too small. */ len = cmd_len - sizeof(*req); if (chan->conf_len + len > sizeof(chan->conf_req)) { l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, l2cap_build_conf_rsp(chan, rsp, L2CAP_CONF_REJECT, flags), rsp); goto unlock; } /* Store config. */ memcpy(chan->conf_req + chan->conf_len, req->data, len); chan->conf_len += len; if (flags & L2CAP_CONF_FLAG_CONTINUATION) { /* Incomplete config. Send empty response. */ l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, l2cap_build_conf_rsp(chan, rsp, L2CAP_CONF_SUCCESS, flags), rsp); goto unlock; } /* Complete config. */ len = l2cap_parse_conf_req(chan, rsp, sizeof(rsp)); if (len < 0) { l2cap_send_disconn_req(chan, ECONNRESET); goto unlock; } chan->ident = cmd->ident; l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP) chan->num_conf_rsp++; /* Reset config buffer. */ chan->conf_len = 0; if (!test_bit(CONF_OUTPUT_DONE, &chan->conf_state)) goto unlock; if (test_bit(CONF_INPUT_DONE, &chan->conf_state)) { set_default_fcs(chan); if (chan->mode == L2CAP_MODE_ERTM || chan->mode == L2CAP_MODE_STREAMING) err = l2cap_ertm_init(chan); if (err < 0) l2cap_send_disconn_req(chan, -err); else l2cap_chan_ready(chan); goto unlock; } if (!test_and_set_bit(CONF_REQ_SENT, &chan->conf_state)) { u8 buf[64]; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } /* Got Conf Rsp PENDING from remote side and assume we sent Conf Rsp PENDING in the code above */ if (test_bit(CONF_REM_CONF_PEND, &chan->conf_state) && test_bit(CONF_LOC_CONF_PEND, &chan->conf_state)) { /* check compatibility */ /* Send rsp for BR/EDR channel */ l2cap_send_efs_conf_rsp(chan, rsp, cmd->ident, flags); } unlock: l2cap_chan_unlock(chan); l2cap_chan_put(chan); return err; } static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_conf_rsp *rsp = (struct l2cap_conf_rsp *)data; u16 scid, flags, result; struct l2cap_chan *chan; int len = cmd_len - sizeof(*rsp); int err = 0; if (cmd_len < sizeof(*rsp)) return -EPROTO; scid = __le16_to_cpu(rsp->scid); flags = __le16_to_cpu(rsp->flags); result = __le16_to_cpu(rsp->result); BT_DBG("scid 0x%4.4x flags 0x%2.2x result 0x%2.2x len %d", scid, flags, result, len); chan = l2cap_get_chan_by_scid(conn, scid); if (!chan) return 0; switch (result) { case L2CAP_CONF_SUCCESS: l2cap_conf_rfc_get(chan, rsp->data, len); clear_bit(CONF_REM_CONF_PEND, &chan->conf_state); break; case L2CAP_CONF_PENDING: set_bit(CONF_REM_CONF_PEND, &chan->conf_state); if (test_bit(CONF_LOC_CONF_PEND, &chan->conf_state)) { char buf[64]; len = l2cap_parse_conf_rsp(chan, rsp->data, len, buf, sizeof(buf), &result); if (len < 0) { l2cap_send_disconn_req(chan, ECONNRESET); goto done; } l2cap_send_efs_conf_rsp(chan, buf, cmd->ident, 0); } goto done; case L2CAP_CONF_UNKNOWN: case L2CAP_CONF_UNACCEPT: if (chan->num_conf_rsp <= L2CAP_CONF_MAX_CONF_RSP) { char req[64]; if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) { l2cap_send_disconn_req(chan, ECONNRESET); goto done; } /* throw out any old stored conf requests */ result = L2CAP_CONF_SUCCESS; len = l2cap_parse_conf_rsp(chan, rsp->data, len, req, sizeof(req), &result); if (len < 0) { l2cap_send_disconn_req(chan, ECONNRESET); goto done; } l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, len, req); chan->num_conf_req++; if (result != L2CAP_CONF_SUCCESS) goto done; break; } fallthrough; default: l2cap_chan_set_err(chan, ECONNRESET); __set_chan_timer(chan, L2CAP_DISC_REJ_TIMEOUT); l2cap_send_disconn_req(chan, ECONNRESET); goto done; } if (flags & L2CAP_CONF_FLAG_CONTINUATION) goto done; set_bit(CONF_INPUT_DONE, &chan->conf_state); if (test_bit(CONF_OUTPUT_DONE, &chan->conf_state)) { set_default_fcs(chan); if (chan->mode == L2CAP_MODE_ERTM || chan->mode == L2CAP_MODE_STREAMING) err = l2cap_ertm_init(chan); if (err < 0) l2cap_send_disconn_req(chan, -err); else l2cap_chan_ready(chan); } done: l2cap_chan_unlock(chan); l2cap_chan_put(chan); return err; } static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_disconn_req *req = (struct l2cap_disconn_req *) data; struct l2cap_disconn_rsp rsp; u16 dcid, scid; struct l2cap_chan *chan; if (cmd_len != sizeof(*req)) return -EPROTO; scid = __le16_to_cpu(req->scid); dcid = __le16_to_cpu(req->dcid); BT_DBG("scid 0x%4.4x dcid 0x%4.4x", scid, dcid); chan = l2cap_get_chan_by_scid(conn, dcid); if (!chan) { cmd_reject_invalid_cid(conn, cmd->ident, dcid, scid); return 0; } rsp.dcid = cpu_to_le16(chan->scid); rsp.scid = cpu_to_le16(chan->dcid); l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp); chan->ops->set_shutdown(chan); l2cap_chan_unlock(chan); mutex_lock(&conn->chan_lock); l2cap_chan_lock(chan); l2cap_chan_del(chan, ECONNRESET); mutex_unlock(&conn->chan_lock); chan->ops->close(chan); l2cap_chan_unlock(chan); l2cap_chan_put(chan); return 0; } static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_disconn_rsp *rsp = (struct l2cap_disconn_rsp *) data; u16 dcid, scid; struct l2cap_chan *chan; if (cmd_len != sizeof(*rsp)) return -EPROTO; scid = __le16_to_cpu(rsp->scid); dcid = __le16_to_cpu(rsp->dcid); BT_DBG("dcid 0x%4.4x scid 0x%4.4x", dcid, scid); chan = l2cap_get_chan_by_scid(conn, scid); if (!chan) { return 0; } if (chan->state != BT_DISCONN) { l2cap_chan_unlock(chan); l2cap_chan_put(chan); return 0; } l2cap_chan_unlock(chan); mutex_lock(&conn->chan_lock); l2cap_chan_lock(chan); l2cap_chan_del(chan, 0); mutex_unlock(&conn->chan_lock); chan->ops->close(chan); l2cap_chan_unlock(chan); l2cap_chan_put(chan); return 0; } static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_info_req *req = (struct l2cap_info_req *) data; u16 type; if (cmd_len != sizeof(*req)) return -EPROTO; type = __le16_to_cpu(req->type); BT_DBG("type 0x%4.4x", type); if (type == L2CAP_IT_FEAT_MASK) { u8 buf[8]; u32 feat_mask = l2cap_feat_mask; struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK); rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); if (!disable_ertm) feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING | L2CAP_FEAT_FCS; put_unaligned_le32(feat_mask, rsp->data); l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(buf), buf); } else if (type == L2CAP_IT_FIXED_CHAN) { u8 buf[12]; struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; rsp->type = cpu_to_le16(L2CAP_IT_FIXED_CHAN); rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); rsp->data[0] = conn->local_fixed_chan; memset(rsp->data + 1, 0, 7); l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(buf), buf); } else { struct l2cap_info_rsp rsp; rsp.type = cpu_to_le16(type); rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP); l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(rsp), &rsp); } return 0; } static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) data; u16 type, result; if (cmd_len < sizeof(*rsp)) return -EPROTO; type = __le16_to_cpu(rsp->type); result = __le16_to_cpu(rsp->result); BT_DBG("type 0x%4.4x result 0x%2.2x", type, result); /* L2CAP Info req/rsp are unbound to channels, add extra checks */ if (cmd->ident != conn->info_ident || conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) return 0; cancel_delayed_work(&conn->info_timer); if (result != L2CAP_IR_SUCCESS) { conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; l2cap_conn_start(conn); return 0; } switch (type) { case L2CAP_IT_FEAT_MASK: conn->feat_mask = get_unaligned_le32(rsp->data); if (conn->feat_mask & L2CAP_FEAT_FIXED_CHAN) { struct l2cap_info_req req; req.type = cpu_to_le16(L2CAP_IT_FIXED_CHAN); conn->info_ident = l2cap_get_ident(conn); l2cap_send_cmd(conn, conn->info_ident, L2CAP_INFO_REQ, sizeof(req), &req); } else { conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; l2cap_conn_start(conn); } break; case L2CAP_IT_FIXED_CHAN: conn->remote_fixed_chan = rsp->data[0]; conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; l2cap_conn_start(conn); break; } return 0; } static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct hci_conn *hcon = conn->hcon; struct l2cap_conn_param_update_req *req; struct l2cap_conn_param_update_rsp rsp; u16 min, max, latency, to_multiplier; int err; if (hcon->role != HCI_ROLE_MASTER) return -EINVAL; if (cmd_len != sizeof(struct l2cap_conn_param_update_req)) return -EPROTO; req = (struct l2cap_conn_param_update_req *) data; min = __le16_to_cpu(req->min); max = __le16_to_cpu(req->max); latency = __le16_to_cpu(req->latency); to_multiplier = __le16_to_cpu(req->to_multiplier); BT_DBG("min 0x%4.4x max 0x%4.4x latency: 0x%4.4x Timeout: 0x%4.4x", min, max, latency, to_multiplier); memset(&rsp, 0, sizeof(rsp)); err = hci_check_conn_params(min, max, latency, to_multiplier); if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_ACCEPTED); l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP, sizeof(rsp), &rsp); if (!err) { u8 store_hint; store_hint = hci_le_conn_update(hcon, min, max, latency, to_multiplier); mgmt_new_conn_param(hcon->hdev, &hcon->dst, hcon->dst_type, store_hint, min, max, latency, to_multiplier); } return 0; } static int l2cap_le_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_le_conn_rsp *rsp = (struct l2cap_le_conn_rsp *) data; struct hci_conn *hcon = conn->hcon; u16 dcid, mtu, mps, credits, result; struct l2cap_chan *chan; int err, sec_level; if (cmd_len < sizeof(*rsp)) return -EPROTO; dcid = __le16_to_cpu(rsp->dcid); mtu = __le16_to_cpu(rsp->mtu); mps = __le16_to_cpu(rsp->mps); credits = __le16_to_cpu(rsp->credits); result = __le16_to_cpu(rsp->result); if (result == L2CAP_CR_LE_SUCCESS && (mtu < 23 || mps < 23 || dcid < L2CAP_CID_DYN_START || dcid > L2CAP_CID_LE_DYN_END)) return -EPROTO; BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x", dcid, mtu, mps, credits, result); mutex_lock(&conn->chan_lock); chan = __l2cap_get_chan_by_ident(conn, cmd->ident); if (!chan) { err = -EBADSLT; goto unlock; } err = 0; l2cap_chan_lock(chan); switch (result) { case L2CAP_CR_LE_SUCCESS: if (__l2cap_get_chan_by_dcid(conn, dcid)) { err = -EBADSLT; break; } chan->ident = 0; chan->dcid = dcid; chan->omtu = mtu; chan->remote_mps = mps; chan->tx_credits = credits; l2cap_chan_ready(chan); break; case L2CAP_CR_LE_AUTHENTICATION: case L2CAP_CR_LE_ENCRYPTION: /* If we already have MITM protection we can't do * anything. */ if (hcon->sec_level > BT_SECURITY_MEDIUM) { l2cap_chan_del(chan, ECONNREFUSED); break; } sec_level = hcon->sec_level + 1; if (chan->sec_level < sec_level) chan->sec_level = sec_level; /* We'll need to send a new Connect Request */ clear_bit(FLAG_LE_CONN_REQ_SENT, &chan->flags); smp_conn_security(hcon, chan->sec_level); break; default: l2cap_chan_del(chan, ECONNREFUSED); break; } l2cap_chan_unlock(chan); unlock: mutex_unlock(&conn->chan_lock); return err; } static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { int err = 0; switch (cmd->code) { case L2CAP_COMMAND_REJ: l2cap_command_rej(conn, cmd, cmd_len, data); break; case L2CAP_CONN_REQ: err = l2cap_connect_req(conn, cmd, cmd_len, data); break; case L2CAP_CONN_RSP: l2cap_connect_create_rsp(conn, cmd, cmd_len, data); break; case L2CAP_CONF_REQ: err = l2cap_config_req(conn, cmd, cmd_len, data); break; case L2CAP_CONF_RSP: l2cap_config_rsp(conn, cmd, cmd_len, data); break; case L2CAP_DISCONN_REQ: err = l2cap_disconnect_req(conn, cmd, cmd_len, data); break; case L2CAP_DISCONN_RSP: l2cap_disconnect_rsp(conn, cmd, cmd_len, data); break; case L2CAP_ECHO_REQ: l2cap_send_cmd(conn, cmd->ident, L2CAP_ECHO_RSP, cmd_len, data); break; case L2CAP_ECHO_RSP: break; case L2CAP_INFO_REQ: err = l2cap_information_req(conn, cmd, cmd_len, data); break; case L2CAP_INFO_RSP: l2cap_information_rsp(conn, cmd, cmd_len, data); break; default: BT_ERR("Unknown BR/EDR signaling command 0x%2.2x", cmd->code); err = -EINVAL; break; } return err; } static int l2cap_le_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_le_conn_req *req = (struct l2cap_le_conn_req *) data; struct l2cap_le_conn_rsp rsp; struct l2cap_chan *chan, *pchan; u16 dcid, scid, credits, mtu, mps; __le16 psm; u8 result; if (cmd_len != sizeof(*req)) return -EPROTO; scid = __le16_to_cpu(req->scid); mtu = __le16_to_cpu(req->mtu); mps = __le16_to_cpu(req->mps); psm = req->psm; dcid = 0; credits = 0; if (mtu < 23 || mps < 23) return -EPROTO; BT_DBG("psm 0x%2.2x scid 0x%4.4x mtu %u mps %u", __le16_to_cpu(psm), scid, mtu, mps); /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A * page 1059: * * Valid range: 0x0001-0x00ff * * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges */ if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) { result = L2CAP_CR_LE_BAD_PSM; chan = NULL; goto response; } /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, &conn->hcon->dst, LE_LINK); if (!pchan) { result = L2CAP_CR_LE_BAD_PSM; chan = NULL; goto response; } mutex_lock(&conn->chan_lock); l2cap_chan_lock(pchan); if (!smp_sufficient_security(conn->hcon, pchan->sec_level, SMP_ALLOW_STK)) { result = L2CAP_CR_LE_AUTHENTICATION; chan = NULL; goto response_unlock; } /* Check for valid dynamic CID range */ if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) { result = L2CAP_CR_LE_INVALID_SCID; chan = NULL; goto response_unlock; } /* Check if we already have channel with that dcid */ if (__l2cap_get_chan_by_dcid(conn, scid)) { result = L2CAP_CR_LE_SCID_IN_USE; chan = NULL; goto response_unlock; } chan = pchan->ops->new_connection(pchan); if (!chan) { result = L2CAP_CR_LE_NO_MEM; goto response_unlock; } bacpy(&chan->src, &conn->hcon->src); bacpy(&chan->dst, &conn->hcon->dst); chan->src_type = bdaddr_src_type(conn->hcon); chan->dst_type = bdaddr_dst_type(conn->hcon); chan->psm = psm; chan->dcid = scid; chan->omtu = mtu; chan->remote_mps = mps; __l2cap_chan_add(conn, chan); l2cap_le_flowctl_init(chan, __le16_to_cpu(req->credits)); dcid = chan->scid; credits = chan->rx_credits; __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); chan->ident = cmd->ident; if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { l2cap_state_change(chan, BT_CONNECT2); /* The following result value is actually not defined * for LE CoC but we use it to let the function know * that it should bail out after doing its cleanup * instead of sending a response. */ result = L2CAP_CR_PEND; chan->ops->defer(chan); } else { l2cap_chan_ready(chan); result = L2CAP_CR_LE_SUCCESS; } response_unlock: l2cap_chan_unlock(pchan); mutex_unlock(&conn->chan_lock); l2cap_chan_put(pchan); if (result == L2CAP_CR_PEND) return 0; response: if (chan) { rsp.mtu = cpu_to_le16(chan->imtu); rsp.mps = cpu_to_le16(chan->mps); } else { rsp.mtu = 0; rsp.mps = 0; } rsp.dcid = cpu_to_le16(dcid); rsp.credits = cpu_to_le16(credits); rsp.result = cpu_to_le16(result); l2cap_send_cmd(conn, cmd->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), &rsp); return 0; } static inline int l2cap_le_credits(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_le_credits *pkt; struct l2cap_chan *chan; u16 cid, credits, max_credits; if (cmd_len != sizeof(*pkt)) return -EPROTO; pkt = (struct l2cap_le_credits *) data; cid = __le16_to_cpu(pkt->cid); credits = __le16_to_cpu(pkt->credits); BT_DBG("cid 0x%4.4x credits 0x%4.4x", cid, credits); chan = l2cap_get_chan_by_dcid(conn, cid); if (!chan) return -EBADSLT; max_credits = LE_FLOWCTL_MAX_CREDITS - chan->tx_credits; if (credits > max_credits) { BT_ERR("LE credits overflow"); l2cap_send_disconn_req(chan, ECONNRESET); /* Return 0 so that we don't trigger an unnecessary * command reject packet. */ goto unlock; } chan->tx_credits += credits; /* Resume sending */ l2cap_le_flowctl_send(chan); if (chan->tx_credits) chan->ops->resume(chan); unlock: l2cap_chan_unlock(chan); l2cap_chan_put(chan); return 0; } static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_ecred_conn_req *req = (void *) data; DEFINE_RAW_FLEX(struct l2cap_ecred_conn_rsp, pdu, dcid, L2CAP_ECRED_MAX_CID); struct l2cap_chan *chan, *pchan; u16 mtu, mps; __le16 psm; u8 result, len = 0; int i, num_scid; bool defer = false; if (!enable_ecred) return -EINVAL; if (cmd_len < sizeof(*req) || (cmd_len - sizeof(*req)) % sizeof(u16)) { result = L2CAP_CR_LE_INVALID_PARAMS; goto response; } cmd_len -= sizeof(*req); num_scid = cmd_len / sizeof(u16); if (num_scid > L2CAP_ECRED_MAX_CID) { result = L2CAP_CR_LE_INVALID_PARAMS; goto response; } mtu = __le16_to_cpu(req->mtu); mps = __le16_to_cpu(req->mps); if (mtu < L2CAP_ECRED_MIN_MTU || mps < L2CAP_ECRED_MIN_MPS) { result = L2CAP_CR_LE_UNACCEPT_PARAMS; goto response; } psm = req->psm; /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A * page 1059: * * Valid range: 0x0001-0x00ff * * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges */ if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) { result = L2CAP_CR_LE_BAD_PSM; goto response; } BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); memset(pdu, 0, sizeof(*pdu)); /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, &conn->hcon->dst, LE_LINK); if (!pchan) { result = L2CAP_CR_LE_BAD_PSM; goto response; } mutex_lock(&conn->chan_lock); l2cap_chan_lock(pchan); if (!smp_sufficient_security(conn->hcon, pchan->sec_level, SMP_ALLOW_STK)) { result = L2CAP_CR_LE_AUTHENTICATION; goto unlock; } result = L2CAP_CR_LE_SUCCESS; for (i = 0; i < num_scid; i++) { u16 scid = __le16_to_cpu(req->scid[i]); BT_DBG("scid[%d] 0x%4.4x", i, scid); pdu->dcid[i] = 0x0000; len += sizeof(*pdu->dcid); /* Check for valid dynamic CID range */ if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) { result = L2CAP_CR_LE_INVALID_SCID; continue; } /* Check if we already have channel with that dcid */ if (__l2cap_get_chan_by_dcid(conn, scid)) { result = L2CAP_CR_LE_SCID_IN_USE; continue; } chan = pchan->ops->new_connection(pchan); if (!chan) { result = L2CAP_CR_LE_NO_MEM; continue; } bacpy(&chan->src, &conn->hcon->src); bacpy(&chan->dst, &conn->hcon->dst); chan->src_type = bdaddr_src_type(conn->hcon); chan->dst_type = bdaddr_dst_type(conn->hcon); chan->psm = psm; chan->dcid = scid; chan->omtu = mtu; chan->remote_mps = mps; __l2cap_chan_add(conn, chan); l2cap_ecred_init(chan, __le16_to_cpu(req->credits)); /* Init response */ if (!pdu->credits) { pdu->mtu = cpu_to_le16(chan->imtu); pdu->mps = cpu_to_le16(chan->mps); pdu->credits = cpu_to_le16(chan->rx_credits); } pdu->dcid[i] = cpu_to_le16(chan->scid); __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); chan->ident = cmd->ident; chan->mode = L2CAP_MODE_EXT_FLOWCTL; if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { l2cap_state_change(chan, BT_CONNECT2); defer = true; chan->ops->defer(chan); } else { l2cap_chan_ready(chan); } } unlock: l2cap_chan_unlock(pchan); mutex_unlock(&conn->chan_lock); l2cap_chan_put(pchan); response: pdu->result = cpu_to_le16(result); if (defer) return 0; l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_CONN_RSP, sizeof(*pdu) + len, pdu); return 0; } static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_ecred_conn_rsp *rsp = (void *) data; struct hci_conn *hcon = conn->hcon; u16 mtu, mps, credits, result; struct l2cap_chan *chan, *tmp; int err = 0, sec_level; int i = 0; if (cmd_len < sizeof(*rsp)) return -EPROTO; mtu = __le16_to_cpu(rsp->mtu); mps = __le16_to_cpu(rsp->mps); credits = __le16_to_cpu(rsp->credits); result = __le16_to_cpu(rsp->result); BT_DBG("mtu %u mps %u credits %u result 0x%4.4x", mtu, mps, credits, result); mutex_lock(&conn->chan_lock); cmd_len -= sizeof(*rsp); list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { u16 dcid; if (chan->ident != cmd->ident || chan->mode != L2CAP_MODE_EXT_FLOWCTL || chan->state == BT_CONNECTED) continue; l2cap_chan_lock(chan); /* Check that there is a dcid for each pending channel */ if (cmd_len < sizeof(dcid)) { l2cap_chan_del(chan, ECONNREFUSED); l2cap_chan_unlock(chan); continue; } dcid = __le16_to_cpu(rsp->dcid[i++]); cmd_len -= sizeof(u16); BT_DBG("dcid[%d] 0x%4.4x", i, dcid); /* Check if dcid is already in use */ if (dcid && __l2cap_get_chan_by_dcid(conn, dcid)) { /* If a device receives a * L2CAP_CREDIT_BASED_CONNECTION_RSP packet with an * already-assigned Destination CID, then both the * original channel and the new channel shall be * immediately discarded and not used. */ l2cap_chan_del(chan, ECONNREFUSED); l2cap_chan_unlock(chan); chan = __l2cap_get_chan_by_dcid(conn, dcid); l2cap_chan_lock(chan); l2cap_chan_del(chan, ECONNRESET); l2cap_chan_unlock(chan); continue; } switch (result) { case L2CAP_CR_LE_AUTHENTICATION: case L2CAP_CR_LE_ENCRYPTION: /* If we already have MITM protection we can't do * anything. */ if (hcon->sec_level > BT_SECURITY_MEDIUM) { l2cap_chan_del(chan, ECONNREFUSED); break; } sec_level = hcon->sec_level + 1; if (chan->sec_level < sec_level) chan->sec_level = sec_level; /* We'll need to send a new Connect Request */ clear_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags); smp_conn_security(hcon, chan->sec_level); break; case L2CAP_CR_LE_BAD_PSM: l2cap_chan_del(chan, ECONNREFUSED); break; default: /* If dcid was not set it means channels was refused */ if (!dcid) { l2cap_chan_del(chan, ECONNREFUSED); break; } chan->ident = 0; chan->dcid = dcid; chan->omtu = mtu; chan->remote_mps = mps; chan->tx_credits = credits; l2cap_chan_ready(chan); break; } l2cap_chan_unlock(chan); } mutex_unlock(&conn->chan_lock); return err; } static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_ecred_reconf_req *req = (void *) data; struct l2cap_ecred_reconf_rsp rsp; u16 mtu, mps, result; struct l2cap_chan *chan; int i, num_scid; if (!enable_ecred) return -EINVAL; if (cmd_len < sizeof(*req) || cmd_len - sizeof(*req) % sizeof(u16)) { result = L2CAP_CR_LE_INVALID_PARAMS; goto respond; } mtu = __le16_to_cpu(req->mtu); mps = __le16_to_cpu(req->mps); BT_DBG("mtu %u mps %u", mtu, mps); if (mtu < L2CAP_ECRED_MIN_MTU) { result = L2CAP_RECONF_INVALID_MTU; goto respond; } if (mps < L2CAP_ECRED_MIN_MPS) { result = L2CAP_RECONF_INVALID_MPS; goto respond; } cmd_len -= sizeof(*req); num_scid = cmd_len / sizeof(u16); result = L2CAP_RECONF_SUCCESS; for (i = 0; i < num_scid; i++) { u16 scid; scid = __le16_to_cpu(req->scid[i]); if (!scid) return -EPROTO; chan = __l2cap_get_chan_by_dcid(conn, scid); if (!chan) continue; /* If the MTU value is decreased for any of the included * channels, then the receiver shall disconnect all * included channels. */ if (chan->omtu > mtu) { BT_ERR("chan %p decreased MTU %u -> %u", chan, chan->omtu, mtu); result = L2CAP_RECONF_INVALID_MTU; } chan->omtu = mtu; chan->remote_mps = mps; } respond: rsp.result = cpu_to_le16(result); l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_RECONF_RSP, sizeof(rsp), &rsp); return 0; } static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_chan *chan, *tmp; struct l2cap_ecred_conn_rsp *rsp = (void *) data; u16 result; if (cmd_len < sizeof(*rsp)) return -EPROTO; result = __le16_to_cpu(rsp->result); BT_DBG("result 0x%4.4x", rsp->result); if (!result) return 0; list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { if (chan->ident != cmd->ident) continue; l2cap_chan_del(chan, ECONNRESET); } return 0; } static inline int l2cap_le_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_cmd_rej_unk *rej = (struct l2cap_cmd_rej_unk *) data; struct l2cap_chan *chan; if (cmd_len < sizeof(*rej)) return -EPROTO; mutex_lock(&conn->chan_lock); chan = __l2cap_get_chan_by_ident(conn, cmd->ident); if (!chan) goto done; chan = l2cap_chan_hold_unless_zero(chan); if (!chan) goto done; l2cap_chan_lock(chan); l2cap_chan_del(chan, ECONNREFUSED); l2cap_chan_unlock(chan); l2cap_chan_put(chan); done: mutex_unlock(&conn->chan_lock); return 0; } static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { int err = 0; switch (cmd->code) { case L2CAP_COMMAND_REJ: l2cap_le_command_rej(conn, cmd, cmd_len, data); break; case L2CAP_CONN_PARAM_UPDATE_REQ: err = l2cap_conn_param_update_req(conn, cmd, cmd_len, data); break; case L2CAP_CONN_PARAM_UPDATE_RSP: break; case L2CAP_LE_CONN_RSP: l2cap_le_connect_rsp(conn, cmd, cmd_len, data); break; case L2CAP_LE_CONN_REQ: err = l2cap_le_connect_req(conn, cmd, cmd_len, data); break; case L2CAP_LE_CREDITS: err = l2cap_le_credits(conn, cmd, cmd_len, data); break; case L2CAP_ECRED_CONN_REQ: err = l2cap_ecred_conn_req(conn, cmd, cmd_len, data); break; case L2CAP_ECRED_CONN_RSP: err = l2cap_ecred_conn_rsp(conn, cmd, cmd_len, data); break; case L2CAP_ECRED_RECONF_REQ: err = l2cap_ecred_reconf_req(conn, cmd, cmd_len, data); break; case L2CAP_ECRED_RECONF_RSP: err = l2cap_ecred_reconf_rsp(conn, cmd, cmd_len, data); break; case L2CAP_DISCONN_REQ: err = l2cap_disconnect_req(conn, cmd, cmd_len, data); break; case L2CAP_DISCONN_RSP: l2cap_disconnect_rsp(conn, cmd, cmd_len, data); break; default: BT_ERR("Unknown LE signaling command 0x%2.2x", cmd->code); err = -EINVAL; break; } return err; } static inline void l2cap_le_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { struct hci_conn *hcon = conn->hcon; struct l2cap_cmd_hdr *cmd; u16 len; int err; if (hcon->type != LE_LINK) goto drop; if (skb->len < L2CAP_CMD_HDR_SIZE) goto drop; cmd = (void *) skb->data; skb_pull(skb, L2CAP_CMD_HDR_SIZE); len = le16_to_cpu(cmd->len); BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd->code, len, cmd->ident); if (len != skb->len || !cmd->ident) { BT_DBG("corrupted command"); goto drop; } err = l2cap_le_sig_cmd(conn, cmd, len, skb->data); if (err) { struct l2cap_cmd_rej_unk rej; BT_ERR("Wrong link type (%d)", err); rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); } drop: kfree_skb(skb); } static inline void l2cap_sig_send_rej(struct l2cap_conn *conn, u16 ident) { struct l2cap_cmd_rej_unk rej; rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); } static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { struct hci_conn *hcon = conn->hcon; struct l2cap_cmd_hdr *cmd; int err; l2cap_raw_recv(conn, skb); if (hcon->type != ACL_LINK) goto drop; while (skb->len >= L2CAP_CMD_HDR_SIZE) { u16 len; cmd = (void *) skb->data; skb_pull(skb, L2CAP_CMD_HDR_SIZE); len = le16_to_cpu(cmd->len); BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd->code, len, cmd->ident); if (len > skb->len || !cmd->ident) { BT_DBG("corrupted command"); l2cap_sig_send_rej(conn, cmd->ident); skb_pull(skb, len > skb->len ? skb->len : len); continue; } err = l2cap_bredr_sig_cmd(conn, cmd, len, skb->data); if (err) { BT_ERR("Wrong link type (%d)", err); l2cap_sig_send_rej(conn, cmd->ident); } skb_pull(skb, len); } if (skb->len > 0) { BT_DBG("corrupted command"); l2cap_sig_send_rej(conn, 0); } drop: kfree_skb(skb); } static int l2cap_check_fcs(struct l2cap_chan *chan, struct sk_buff *skb) { u16 our_fcs, rcv_fcs; int hdr_size; if (test_bit(FLAG_EXT_CTRL, &chan->flags)) hdr_size = L2CAP_EXT_HDR_SIZE; else hdr_size = L2CAP_ENH_HDR_SIZE; if (chan->fcs == L2CAP_FCS_CRC16) { skb_trim(skb, skb->len - L2CAP_FCS_SIZE); rcv_fcs = get_unaligned_le16(skb->data + skb->len); our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size); if (our_fcs != rcv_fcs) return -EBADMSG; } return 0; } static void l2cap_send_i_or_rr_or_rnr(struct l2cap_chan *chan) { struct l2cap_ctrl control; BT_DBG("chan %p", chan); memset(&control, 0, sizeof(control)); control.sframe = 1; control.final = 1; control.reqseq = chan->buffer_seq; set_bit(CONN_SEND_FBIT, &chan->conn_state); if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) { control.super = L2CAP_SUPER_RNR; l2cap_send_sframe(chan, &control); } if (test_and_clear_bit(CONN_REMOTE_BUSY, &chan->conn_state) && chan->unacked_frames > 0) __set_retrans_timer(chan); /* Send pending iframes */ l2cap_ertm_send(chan); if (!test_bit(CONN_LOCAL_BUSY, &chan->conn_state) && test_bit(CONN_SEND_FBIT, &chan->conn_state)) { /* F-bit wasn't sent in an s-frame or i-frame yet, so * send it now. */ control.super = L2CAP_SUPER_RR; l2cap_send_sframe(chan, &control); } } static void append_skb_frag(struct sk_buff *skb, struct sk_buff *new_frag, struct sk_buff **last_frag) { /* skb->len reflects data in skb as well as all fragments * skb->data_len reflects only data in fragments */ if (!skb_has_frag_list(skb)) skb_shinfo(skb)->frag_list = new_frag; new_frag->next = NULL; (*last_frag)->next = new_frag; *last_frag = new_frag; skb->len += new_frag->len; skb->data_len += new_frag->len; skb->truesize += new_frag->truesize; } static int l2cap_reassemble_sdu(struct l2cap_chan *chan, struct sk_buff *skb, struct l2cap_ctrl *control) { int err = -EINVAL; switch (control->sar) { case L2CAP_SAR_UNSEGMENTED: if (chan->sdu) break; err = chan->ops->recv(chan, skb); break; case L2CAP_SAR_START: if (chan->sdu) break; if (!pskb_may_pull(skb, L2CAP_SDULEN_SIZE)) break; chan->sdu_len = get_unaligned_le16(skb->data); skb_pull(skb, L2CAP_SDULEN_SIZE); if (chan->sdu_len > chan->imtu) { err = -EMSGSIZE; break; } if (skb->len >= chan->sdu_len) break; chan->sdu = skb; chan->sdu_last_frag = skb; skb = NULL; err = 0; break; case L2CAP_SAR_CONTINUE: if (!chan->sdu) break; append_skb_frag(chan->sdu, skb, &chan->sdu_last_frag); skb = NULL; if (chan->sdu->len >= chan->sdu_len) break; err = 0; break; case L2CAP_SAR_END: if (!chan->sdu) break; append_skb_frag(chan->sdu, skb, &chan->sdu_last_frag); skb = NULL; if (chan->sdu->len != chan->sdu_len) break; err = chan->ops->recv(chan, chan->sdu); if (!err) { /* Reassembly complete */ chan->sdu = NULL; chan->sdu_last_frag = NULL; chan->sdu_len = 0; } break; } if (err) { kfree_skb(skb); kfree_skb(chan->sdu); chan->sdu = NULL; chan->sdu_last_frag = NULL; chan->sdu_len = 0; } return err; } static int l2cap_resegment(struct l2cap_chan *chan) { /* Placeholder */ return 0; } void l2cap_chan_busy(struct l2cap_chan *chan, int busy) { u8 event; if (chan->mode != L2CAP_MODE_ERTM) return; event = busy ? L2CAP_EV_LOCAL_BUSY_DETECTED : L2CAP_EV_LOCAL_BUSY_CLEAR; l2cap_tx(chan, NULL, NULL, event); } static int l2cap_rx_queued_iframes(struct l2cap_chan *chan) { int err = 0; /* Pass sequential frames to l2cap_reassemble_sdu() * until a gap is encountered. */ BT_DBG("chan %p", chan); while (!test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) { struct sk_buff *skb; BT_DBG("Searching for skb with txseq %d (queue len %d)", chan->buffer_seq, skb_queue_len(&chan->srej_q)); skb = l2cap_ertm_seq_in_queue(&chan->srej_q, chan->buffer_seq); if (!skb) break; skb_unlink(skb, &chan->srej_q); chan->buffer_seq = __next_seq(chan, chan->buffer_seq); err = l2cap_reassemble_sdu(chan, skb, &bt_cb(skb)->l2cap); if (err) break; } if (skb_queue_empty(&chan->srej_q)) { chan->rx_state = L2CAP_RX_STATE_RECV; l2cap_send_ack(chan); } return err; } static void l2cap_handle_srej(struct l2cap_chan *chan, struct l2cap_ctrl *control) { struct sk_buff *skb; BT_DBG("chan %p, control %p", chan, control); if (control->reqseq == chan->next_tx_seq) { BT_DBG("Invalid reqseq %d, disconnecting", control->reqseq); l2cap_send_disconn_req(chan, ECONNRESET); return; } skb = l2cap_ertm_seq_in_queue(&chan->tx_q, control->reqseq); if (skb == NULL) { BT_DBG("Seq %d not available for retransmission", control->reqseq); return; } if (chan->max_tx != 0 && bt_cb(skb)->l2cap.retries >= chan->max_tx) { BT_DBG("Retry limit exceeded (%d)", chan->max_tx); l2cap_send_disconn_req(chan, ECONNRESET); return; } clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); if (control->poll) { l2cap_pass_to_tx(chan, control); set_bit(CONN_SEND_FBIT, &chan->conn_state); l2cap_retransmit(chan, control); l2cap_ertm_send(chan); if (chan->tx_state == L2CAP_TX_STATE_WAIT_F) { set_bit(CONN_SREJ_ACT, &chan->conn_state); chan->srej_save_reqseq = control->reqseq; } } else { l2cap_pass_to_tx_fbit(chan, control); if (control->final) { if (chan->srej_save_reqseq != control->reqseq || !test_and_clear_bit(CONN_SREJ_ACT, &chan->conn_state)) l2cap_retransmit(chan, control); } else { l2cap_retransmit(chan, control); if (chan->tx_state == L2CAP_TX_STATE_WAIT_F) { set_bit(CONN_SREJ_ACT, &chan->conn_state); chan->srej_save_reqseq = control->reqseq; } } } } static void l2cap_handle_rej(struct l2cap_chan *chan, struct l2cap_ctrl *control) { struct sk_buff *skb; BT_DBG("chan %p, control %p", chan, control); if (control->reqseq == chan->next_tx_seq) { BT_DBG("Invalid reqseq %d, disconnecting", control->reqseq); l2cap_send_disconn_req(chan, ECONNRESET); return; } skb = l2cap_ertm_seq_in_queue(&chan->tx_q, control->reqseq); if (chan->max_tx && skb && bt_cb(skb)->l2cap.retries >= chan->max_tx) { BT_DBG("Retry limit exceeded (%d)", chan->max_tx); l2cap_send_disconn_req(chan, ECONNRESET); return; } clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); l2cap_pass_to_tx(chan, control); if (control->final) { if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) l2cap_retransmit_all(chan, control); } else { l2cap_retransmit_all(chan, control); l2cap_ertm_send(chan); if (chan->tx_state == L2CAP_TX_STATE_WAIT_F) set_bit(CONN_REJ_ACT, &chan->conn_state); } } static u8 l2cap_classify_txseq(struct l2cap_chan *chan, u16 txseq) { BT_DBG("chan %p, txseq %d", chan, txseq); BT_DBG("last_acked_seq %d, expected_tx_seq %d", chan->last_acked_seq, chan->expected_tx_seq); if (chan->rx_state == L2CAP_RX_STATE_SREJ_SENT) { if (__seq_offset(chan, txseq, chan->last_acked_seq) >= chan->tx_win) { /* See notes below regarding "double poll" and * invalid packets. */ if (chan->tx_win <= ((chan->tx_win_max + 1) >> 1)) { BT_DBG("Invalid/Ignore - after SREJ"); return L2CAP_TXSEQ_INVALID_IGNORE; } else { BT_DBG("Invalid - in window after SREJ sent"); return L2CAP_TXSEQ_INVALID; } } if (chan->srej_list.head == txseq) { BT_DBG("Expected SREJ"); return L2CAP_TXSEQ_EXPECTED_SREJ; } if (l2cap_ertm_seq_in_queue(&chan->srej_q, txseq)) { BT_DBG("Duplicate SREJ - txseq already stored"); return L2CAP_TXSEQ_DUPLICATE_SREJ; } if (l2cap_seq_list_contains(&chan->srej_list, txseq)) { BT_DBG("Unexpected SREJ - not requested"); return L2CAP_TXSEQ_UNEXPECTED_SREJ; } } if (chan->expected_tx_seq == txseq) { if (__seq_offset(chan, txseq, chan->last_acked_seq) >= chan->tx_win) { BT_DBG("Invalid - txseq outside tx window"); return L2CAP_TXSEQ_INVALID; } else { BT_DBG("Expected"); return L2CAP_TXSEQ_EXPECTED; } } if (__seq_offset(chan, txseq, chan->last_acked_seq) < __seq_offset(chan, chan->expected_tx_seq, chan->last_acked_seq)) { BT_DBG("Duplicate - expected_tx_seq later than txseq"); return L2CAP_TXSEQ_DUPLICATE; } if (__seq_offset(chan, txseq, chan->last_acked_seq) >= chan->tx_win) { /* A source of invalid packets is a "double poll" condition, * where delays cause us to send multiple poll packets. If * the remote stack receives and processes both polls, * sequence numbers can wrap around in such a way that a * resent frame has a sequence number that looks like new data * with a sequence gap. This would trigger an erroneous SREJ * request. * * Fortunately, this is impossible with a tx window that's * less than half of the maximum sequence number, which allows * invalid frames to be safely ignored. * * With tx window sizes greater than half of the tx window * maximum, the frame is invalid and cannot be ignored. This * causes a disconnect. */ if (chan->tx_win <= ((chan->tx_win_max + 1) >> 1)) { BT_DBG("Invalid/Ignore - txseq outside tx window"); return L2CAP_TXSEQ_INVALID_IGNORE; } else { BT_DBG("Invalid - txseq outside tx window"); return L2CAP_TXSEQ_INVALID; } } else { BT_DBG("Unexpected - txseq indicates missing frames"); return L2CAP_TXSEQ_UNEXPECTED; } } static int l2cap_rx_state_recv(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb, u8 event) { struct l2cap_ctrl local_control; int err = 0; bool skb_in_use = false; BT_DBG("chan %p, control %p, skb %p, event %d", chan, control, skb, event); switch (event) { case L2CAP_EV_RECV_IFRAME: switch (l2cap_classify_txseq(chan, control->txseq)) { case L2CAP_TXSEQ_EXPECTED: l2cap_pass_to_tx(chan, control); if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) { BT_DBG("Busy, discarding expected seq %d", control->txseq); break; } chan->expected_tx_seq = __next_seq(chan, control->txseq); chan->buffer_seq = chan->expected_tx_seq; skb_in_use = true; /* l2cap_reassemble_sdu may free skb, hence invalidate * control, so make a copy in advance to use it after * l2cap_reassemble_sdu returns and to avoid the race * condition, for example: * * The current thread calls: * l2cap_reassemble_sdu * chan->ops->recv == l2cap_sock_recv_cb * __sock_queue_rcv_skb * Another thread calls: * bt_sock_recvmsg * skb_recv_datagram * skb_free_datagram * Then the current thread tries to access control, but * it was freed by skb_free_datagram. */ local_control = *control; err = l2cap_reassemble_sdu(chan, skb, control); if (err) break; if (local_control.final) { if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) { local_control.final = 0; l2cap_retransmit_all(chan, &local_control); l2cap_ertm_send(chan); } } if (!test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) l2cap_send_ack(chan); break; case L2CAP_TXSEQ_UNEXPECTED: l2cap_pass_to_tx(chan, control); /* Can't issue SREJ frames in the local busy state. * Drop this frame, it will be seen as missing * when local busy is exited. */ if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) { BT_DBG("Busy, discarding unexpected seq %d", control->txseq); break; } /* There was a gap in the sequence, so an SREJ * must be sent for each missing frame. The * current frame is stored for later use. */ skb_queue_tail(&chan->srej_q, skb); skb_in_use = true; BT_DBG("Queued %p (queue len %d)", skb, skb_queue_len(&chan->srej_q)); clear_bit(CONN_SREJ_ACT, &chan->conn_state); l2cap_seq_list_clear(&chan->srej_list); l2cap_send_srej(chan, control->txseq); chan->rx_state = L2CAP_RX_STATE_SREJ_SENT; break; case L2CAP_TXSEQ_DUPLICATE: l2cap_pass_to_tx(chan, control); break; case L2CAP_TXSEQ_INVALID_IGNORE: break; case L2CAP_TXSEQ_INVALID: default: l2cap_send_disconn_req(chan, ECONNRESET); break; } break; case L2CAP_EV_RECV_RR: l2cap_pass_to_tx(chan, control); if (control->final) { clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) { control->final = 0; l2cap_retransmit_all(chan, control); } l2cap_ertm_send(chan); } else if (control->poll) { l2cap_send_i_or_rr_or_rnr(chan); } else { if (test_and_clear_bit(CONN_REMOTE_BUSY, &chan->conn_state) && chan->unacked_frames) __set_retrans_timer(chan); l2cap_ertm_send(chan); } break; case L2CAP_EV_RECV_RNR: set_bit(CONN_REMOTE_BUSY, &chan->conn_state); l2cap_pass_to_tx(chan, control); if (control && control->poll) { set_bit(CONN_SEND_FBIT, &chan->conn_state); l2cap_send_rr_or_rnr(chan, 0); } __clear_retrans_timer(chan); l2cap_seq_list_clear(&chan->retrans_list); break; case L2CAP_EV_RECV_REJ: l2cap_handle_rej(chan, control); break; case L2CAP_EV_RECV_SREJ: l2cap_handle_srej(chan, control); break; default: break; } if (skb && !skb_in_use) { BT_DBG("Freeing %p", skb); kfree_skb(skb); } return err; } static int l2cap_rx_state_srej_sent(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb, u8 event) { int err = 0; u16 txseq = control->txseq; bool skb_in_use = false; BT_DBG("chan %p, control %p, skb %p, event %d", chan, control, skb, event); switch (event) { case L2CAP_EV_RECV_IFRAME: switch (l2cap_classify_txseq(chan, txseq)) { case L2CAP_TXSEQ_EXPECTED: /* Keep frame for reassembly later */ l2cap_pass_to_tx(chan, control); skb_queue_tail(&chan->srej_q, skb); skb_in_use = true; BT_DBG("Queued %p (queue len %d)", skb, skb_queue_len(&chan->srej_q)); chan->expected_tx_seq = __next_seq(chan, txseq); break; case L2CAP_TXSEQ_EXPECTED_SREJ: l2cap_seq_list_pop(&chan->srej_list); l2cap_pass_to_tx(chan, control); skb_queue_tail(&chan->srej_q, skb); skb_in_use = true; BT_DBG("Queued %p (queue len %d)", skb, skb_queue_len(&chan->srej_q)); err = l2cap_rx_queued_iframes(chan); if (err) break; break; case L2CAP_TXSEQ_UNEXPECTED: /* Got a frame that can't be reassembled yet. * Save it for later, and send SREJs to cover * the missing frames. */ skb_queue_tail(&chan->srej_q, skb); skb_in_use = true; BT_DBG("Queued %p (queue len %d)", skb, skb_queue_len(&chan->srej_q)); l2cap_pass_to_tx(chan, control); l2cap_send_srej(chan, control->txseq); break; case L2CAP_TXSEQ_UNEXPECTED_SREJ: /* This frame was requested with an SREJ, but * some expected retransmitted frames are * missing. Request retransmission of missing * SREJ'd frames. */ skb_queue_tail(&chan->srej_q, skb); skb_in_use = true; BT_DBG("Queued %p (queue len %d)", skb, skb_queue_len(&chan->srej_q)); l2cap_pass_to_tx(chan, control); l2cap_send_srej_list(chan, control->txseq); break; case L2CAP_TXSEQ_DUPLICATE_SREJ: /* We've already queued this frame. Drop this copy. */ l2cap_pass_to_tx(chan, control); break; case L2CAP_TXSEQ_DUPLICATE: /* Expecting a later sequence number, so this frame * was already received. Ignore it completely. */ break; case L2CAP_TXSEQ_INVALID_IGNORE: break; case L2CAP_TXSEQ_INVALID: default: l2cap_send_disconn_req(chan, ECONNRESET); break; } break; case L2CAP_EV_RECV_RR: l2cap_pass_to_tx(chan, control); if (control->final) { clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) { control->final = 0; l2cap_retransmit_all(chan, control); } l2cap_ertm_send(chan); } else if (control->poll) { if (test_and_clear_bit(CONN_REMOTE_BUSY, &chan->conn_state) && chan->unacked_frames) { __set_retrans_timer(chan); } set_bit(CONN_SEND_FBIT, &chan->conn_state); l2cap_send_srej_tail(chan); } else { if (test_and_clear_bit(CONN_REMOTE_BUSY, &chan->conn_state) && chan->unacked_frames) __set_retrans_timer(chan); l2cap_send_ack(chan); } break; case L2CAP_EV_RECV_RNR: set_bit(CONN_REMOTE_BUSY, &chan->conn_state); l2cap_pass_to_tx(chan, control); if (control->poll) { l2cap_send_srej_tail(chan); } else { struct l2cap_ctrl rr_control; memset(&rr_control, 0, sizeof(rr_control)); rr_control.sframe = 1; rr_control.super = L2CAP_SUPER_RR; rr_control.reqseq = chan->buffer_seq; l2cap_send_sframe(chan, &rr_control); } break; case L2CAP_EV_RECV_REJ: l2cap_handle_rej(chan, control); break; case L2CAP_EV_RECV_SREJ: l2cap_handle_srej(chan, control); break; } if (skb && !skb_in_use) { BT_DBG("Freeing %p", skb); kfree_skb(skb); } return err; } static int l2cap_finish_move(struct l2cap_chan *chan) { BT_DBG("chan %p", chan); chan->rx_state = L2CAP_RX_STATE_RECV; chan->conn->mtu = chan->conn->hcon->mtu; return l2cap_resegment(chan); } static int l2cap_rx_state_wait_p(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb, u8 event) { int err; BT_DBG("chan %p, control %p, skb %p, event %d", chan, control, skb, event); if (!control->poll) return -EPROTO; l2cap_process_reqseq(chan, control->reqseq); if (!skb_queue_empty(&chan->tx_q)) chan->tx_send_head = skb_peek(&chan->tx_q); else chan->tx_send_head = NULL; /* Rewind next_tx_seq to the point expected * by the receiver. */ chan->next_tx_seq = control->reqseq; chan->unacked_frames = 0; err = l2cap_finish_move(chan); if (err) return err; set_bit(CONN_SEND_FBIT, &chan->conn_state); l2cap_send_i_or_rr_or_rnr(chan); if (event == L2CAP_EV_RECV_IFRAME) return -EPROTO; return l2cap_rx_state_recv(chan, control, NULL, event); } static int l2cap_rx_state_wait_f(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb, u8 event) { int err; if (!control->final) return -EPROTO; clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); chan->rx_state = L2CAP_RX_STATE_RECV; l2cap_process_reqseq(chan, control->reqseq); if (!skb_queue_empty(&chan->tx_q)) chan->tx_send_head = skb_peek(&chan->tx_q); else chan->tx_send_head = NULL; /* Rewind next_tx_seq to the point expected * by the receiver. */ chan->next_tx_seq = control->reqseq; chan->unacked_frames = 0; chan->conn->mtu = chan->conn->hcon->mtu; err = l2cap_resegment(chan); if (!err) err = l2cap_rx_state_recv(chan, control, skb, event); return err; } static bool __valid_reqseq(struct l2cap_chan *chan, u16 reqseq) { /* Make sure reqseq is for a packet that has been sent but not acked */ u16 unacked; unacked = __seq_offset(chan, chan->next_tx_seq, chan->expected_ack_seq); return __seq_offset(chan, chan->next_tx_seq, reqseq) <= unacked; } static int l2cap_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb, u8 event) { int err = 0; BT_DBG("chan %p, control %p, skb %p, event %d, state %d", chan, control, skb, event, chan->rx_state); if (__valid_reqseq(chan, control->reqseq)) { switch (chan->rx_state) { case L2CAP_RX_STATE_RECV: err = l2cap_rx_state_recv(chan, control, skb, event); break; case L2CAP_RX_STATE_SREJ_SENT: err = l2cap_rx_state_srej_sent(chan, control, skb, event); break; case L2CAP_RX_STATE_WAIT_P: err = l2cap_rx_state_wait_p(chan, control, skb, event); break; case L2CAP_RX_STATE_WAIT_F: err = l2cap_rx_state_wait_f(chan, control, skb, event); break; default: /* shut it down */ break; } } else { BT_DBG("Invalid reqseq %d (next_tx_seq %d, expected_ack_seq %d", control->reqseq, chan->next_tx_seq, chan->expected_ack_seq); l2cap_send_disconn_req(chan, ECONNRESET); } return err; } static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb) { /* l2cap_reassemble_sdu may free skb, hence invalidate control, so store * the txseq field in advance to use it after l2cap_reassemble_sdu * returns and to avoid the race condition, for example: * * The current thread calls: * l2cap_reassemble_sdu * chan->ops->recv == l2cap_sock_recv_cb * __sock_queue_rcv_skb * Another thread calls: * bt_sock_recvmsg * skb_recv_datagram * skb_free_datagram * Then the current thread tries to access control, but it was freed by * skb_free_datagram. */ u16 txseq = control->txseq; BT_DBG("chan %p, control %p, skb %p, state %d", chan, control, skb, chan->rx_state); if (l2cap_classify_txseq(chan, txseq) == L2CAP_TXSEQ_EXPECTED) { l2cap_pass_to_tx(chan, control); BT_DBG("buffer_seq %u->%u", chan->buffer_seq, __next_seq(chan, chan->buffer_seq)); chan->buffer_seq = __next_seq(chan, chan->buffer_seq); l2cap_reassemble_sdu(chan, skb, control); } else { if (chan->sdu) { kfree_skb(chan->sdu); chan->sdu = NULL; } chan->sdu_last_frag = NULL; chan->sdu_len = 0; if (skb) { BT_DBG("Freeing %p", skb); kfree_skb(skb); } } chan->last_acked_seq = txseq; chan->expected_tx_seq = __next_seq(chan, txseq); return 0; } static int l2cap_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) { struct l2cap_ctrl *control = &bt_cb(skb)->l2cap; u16 len; u8 event; __unpack_control(chan, skb); len = skb->len; /* * We can just drop the corrupted I-frame here. * Receiver will miss it and start proper recovery * procedures and ask for retransmission. */ if (l2cap_check_fcs(chan, skb)) goto drop; if (!control->sframe && control->sar == L2CAP_SAR_START) len -= L2CAP_SDULEN_SIZE; if (chan->fcs == L2CAP_FCS_CRC16) len -= L2CAP_FCS_SIZE; if (len > chan->mps) { l2cap_send_disconn_req(chan, ECONNRESET); goto drop; } if (chan->ops->filter) { if (chan->ops->filter(chan, skb)) goto drop; } if (!control->sframe) { int err; BT_DBG("iframe sar %d, reqseq %d, final %d, txseq %d", control->sar, control->reqseq, control->final, control->txseq); /* Validate F-bit - F=0 always valid, F=1 only * valid in TX WAIT_F */ if (control->final && chan->tx_state != L2CAP_TX_STATE_WAIT_F) goto drop; if (chan->mode != L2CAP_MODE_STREAMING) { event = L2CAP_EV_RECV_IFRAME; err = l2cap_rx(chan, control, skb, event); } else { err = l2cap_stream_rx(chan, control, skb); } if (err) l2cap_send_disconn_req(chan, ECONNRESET); } else { const u8 rx_func_to_event[4] = { L2CAP_EV_RECV_RR, L2CAP_EV_RECV_REJ, L2CAP_EV_RECV_RNR, L2CAP_EV_RECV_SREJ }; /* Only I-frames are expected in streaming mode */ if (chan->mode == L2CAP_MODE_STREAMING) goto drop; BT_DBG("sframe reqseq %d, final %d, poll %d, super %d", control->reqseq, control->final, control->poll, control->super); if (len != 0) { BT_ERR("Trailing bytes: %d in sframe", len); l2cap_send_disconn_req(chan, ECONNRESET); goto drop; } /* Validate F and P bits */ if (control->final && (control->poll || chan->tx_state != L2CAP_TX_STATE_WAIT_F)) goto drop; event = rx_func_to_event[control->super]; if (l2cap_rx(chan, control, skb, event)) l2cap_send_disconn_req(chan, ECONNRESET); } return 0; drop: kfree_skb(skb); return 0; } static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_le_credits pkt; u16 return_credits = l2cap_le_rx_credits(chan); if (chan->rx_credits >= return_credits) return; return_credits -= chan->rx_credits; BT_DBG("chan %p returning %u credits to sender", chan, return_credits); chan->rx_credits += return_credits; pkt.cid = cpu_to_le16(chan->scid); pkt.credits = cpu_to_le16(return_credits); chan->ident = l2cap_get_ident(conn); l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CREDITS, sizeof(pkt), &pkt); } void l2cap_chan_rx_avail(struct l2cap_chan *chan, ssize_t rx_avail) { if (chan->rx_avail == rx_avail) return; BT_DBG("chan %p has %zd bytes avail for rx", chan, rx_avail); chan->rx_avail = rx_avail; if (chan->state == BT_CONNECTED) l2cap_chan_le_send_credits(chan); } static int l2cap_ecred_recv(struct l2cap_chan *chan, struct sk_buff *skb) { int err; BT_DBG("SDU reassemble complete: chan %p skb->len %u", chan, skb->len); /* Wait recv to confirm reception before updating the credits */ err = chan->ops->recv(chan, skb); if (err < 0 && chan->rx_avail != -1) { BT_ERR("Queueing received LE L2CAP data failed"); l2cap_send_disconn_req(chan, ECONNRESET); return err; } /* Update credits whenever an SDU is received */ l2cap_chan_le_send_credits(chan); return err; } static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) { int err; if (!chan->rx_credits) { BT_ERR("No credits to receive LE L2CAP data"); l2cap_send_disconn_req(chan, ECONNRESET); return -ENOBUFS; } if (chan->imtu < skb->len) { BT_ERR("Too big LE L2CAP PDU"); return -ENOBUFS; } chan->rx_credits--; BT_DBG("chan %p: rx_credits %u -> %u", chan, chan->rx_credits + 1, chan->rx_credits); /* Update if remote had run out of credits, this should only happens * if the remote is not using the entire MPS. */ if (!chan->rx_credits) l2cap_chan_le_send_credits(chan); err = 0; if (!chan->sdu) { u16 sdu_len; sdu_len = get_unaligned_le16(skb->data); skb_pull(skb, L2CAP_SDULEN_SIZE); BT_DBG("Start of new SDU. sdu_len %u skb->len %u imtu %u", sdu_len, skb->len, chan->imtu); if (sdu_len > chan->imtu) { BT_ERR("Too big LE L2CAP SDU length received"); err = -EMSGSIZE; goto failed; } if (skb->len > sdu_len) { BT_ERR("Too much LE L2CAP data received"); err = -EINVAL; goto failed; } if (skb->len == sdu_len) return l2cap_ecred_recv(chan, skb); chan->sdu = skb; chan->sdu_len = sdu_len; chan->sdu_last_frag = skb; /* Detect if remote is not able to use the selected MPS */ if (skb->len + L2CAP_SDULEN_SIZE < chan->mps) { u16 mps_len = skb->len + L2CAP_SDULEN_SIZE; /* Adjust the number of credits */ BT_DBG("chan->mps %u -> %u", chan->mps, mps_len); chan->mps = mps_len; l2cap_chan_le_send_credits(chan); } return 0; } BT_DBG("SDU fragment. chan->sdu->len %u skb->len %u chan->sdu_len %u", chan->sdu->len, skb->len, chan->sdu_len); if (chan->sdu->len + skb->len > chan->sdu_len) { BT_ERR("Too much LE L2CAP data received"); err = -EINVAL; goto failed; } append_skb_frag(chan->sdu, skb, &chan->sdu_last_frag); skb = NULL; if (chan->sdu->len == chan->sdu_len) { err = l2cap_ecred_recv(chan, chan->sdu); if (!err) { chan->sdu = NULL; chan->sdu_last_frag = NULL; chan->sdu_len = 0; } } failed: if (err) { kfree_skb(skb); kfree_skb(chan->sdu); chan->sdu = NULL; chan->sdu_last_frag = NULL; chan->sdu_len = 0; } /* We can't return an error here since we took care of the skb * freeing internally. An error return would cause the caller to * do a double-free of the skb. */ return 0; } static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb) { struct l2cap_chan *chan; chan = l2cap_get_chan_by_scid(conn, cid); if (!chan) { BT_DBG("unknown cid 0x%4.4x", cid); /* Drop packet and return */ kfree_skb(skb); return; } BT_DBG("chan %p, len %d", chan, skb->len); /* If we receive data on a fixed channel before the info req/rsp * procedure is done simply assume that the channel is supported * and mark it as ready. */ if (chan->chan_type == L2CAP_CHAN_FIXED) l2cap_chan_ready(chan); if (chan->state != BT_CONNECTED) goto drop; switch (chan->mode) { case L2CAP_MODE_LE_FLOWCTL: case L2CAP_MODE_EXT_FLOWCTL: if (l2cap_ecred_data_rcv(chan, skb) < 0) goto drop; goto done; case L2CAP_MODE_BASIC: /* If socket recv buffers overflows we drop data here * which is *bad* because L2CAP has to be reliable. * But we don't have any other choice. L2CAP doesn't * provide flow control mechanism. */ if (chan->imtu < skb->len) { BT_ERR("Dropping L2CAP data: receive buffer overflow"); goto drop; } if (!chan->ops->recv(chan, skb)) goto done; break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: l2cap_data_rcv(chan, skb); goto done; default: BT_DBG("chan %p: bad mode 0x%2.2x", chan, chan->mode); break; } drop: kfree_skb(skb); done: l2cap_chan_unlock(chan); l2cap_chan_put(chan); } static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, struct sk_buff *skb) { struct hci_conn *hcon = conn->hcon; struct l2cap_chan *chan; if (hcon->type != ACL_LINK) goto free_skb; chan = l2cap_global_chan_by_psm(0, psm, &hcon->src, &hcon->dst, ACL_LINK); if (!chan) goto free_skb; BT_DBG("chan %p, len %d", chan, skb->len); l2cap_chan_lock(chan); if (chan->state != BT_BOUND && chan->state != BT_CONNECTED) goto drop; if (chan->imtu < skb->len) goto drop; /* Store remote BD_ADDR and PSM for msg_name */ bacpy(&bt_cb(skb)->l2cap.bdaddr, &hcon->dst); bt_cb(skb)->l2cap.psm = psm; if (!chan->ops->recv(chan, skb)) { l2cap_chan_unlock(chan); l2cap_chan_put(chan); return; } drop: l2cap_chan_unlock(chan); l2cap_chan_put(chan); free_skb: kfree_skb(skb); } static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) { struct l2cap_hdr *lh = (void *) skb->data; struct hci_conn *hcon = conn->hcon; u16 cid, len; __le16 psm; if (hcon->state != BT_CONNECTED) { BT_DBG("queueing pending rx skb"); skb_queue_tail(&conn->pending_rx, skb); return; } skb_pull(skb, L2CAP_HDR_SIZE); cid = __le16_to_cpu(lh->cid); len = __le16_to_cpu(lh->len); if (len != skb->len) { kfree_skb(skb); return; } /* Since we can't actively block incoming LE connections we must * at least ensure that we ignore incoming data from them. */ if (hcon->type == LE_LINK && hci_bdaddr_list_lookup(&hcon->hdev->reject_list, &hcon->dst, bdaddr_dst_type(hcon))) { kfree_skb(skb); return; } BT_DBG("len %d, cid 0x%4.4x", len, cid); switch (cid) { case L2CAP_CID_SIGNALING: l2cap_sig_channel(conn, skb); break; case L2CAP_CID_CONN_LESS: psm = get_unaligned((__le16 *) skb->data); skb_pull(skb, L2CAP_PSMLEN_SIZE); l2cap_conless_channel(conn, psm, skb); break; case L2CAP_CID_LE_SIGNALING: l2cap_le_sig_channel(conn, skb); break; default: l2cap_data_channel(conn, cid, skb); break; } } static void process_pending_rx(struct work_struct *work) { struct l2cap_conn *conn = container_of(work, struct l2cap_conn, pending_rx_work); struct sk_buff *skb; BT_DBG(""); while ((skb = skb_dequeue(&conn->pending_rx))) l2cap_recv_frame(conn, skb); } static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) { struct l2cap_conn *conn = hcon->l2cap_data; struct hci_chan *hchan; if (conn) return conn; hchan = hci_chan_create(hcon); if (!hchan) return NULL; conn = kzalloc(sizeof(*conn), GFP_KERNEL); if (!conn) { hci_chan_del(hchan); return NULL; } kref_init(&conn->ref); hcon->l2cap_data = conn; conn->hcon = hci_conn_get(hcon); conn->hchan = hchan; BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan); conn->mtu = hcon->mtu; conn->feat_mask = 0; conn->local_fixed_chan = L2CAP_FC_SIG_BREDR | L2CAP_FC_CONNLESS; if (hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED) && (bredr_sc_enabled(hcon->hdev) || hci_dev_test_flag(hcon->hdev, HCI_FORCE_BREDR_SMP))) conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR; mutex_init(&conn->ident_lock); mutex_init(&conn->chan_lock); INIT_LIST_HEAD(&conn->chan_l); INIT_LIST_HEAD(&conn->users); INIT_DELAYED_WORK(&conn->info_timer, l2cap_info_timeout); skb_queue_head_init(&conn->pending_rx); INIT_WORK(&conn->pending_rx_work, process_pending_rx); INIT_DELAYED_WORK(&conn->id_addr_timer, l2cap_conn_update_id_addr); conn->disc_reason = HCI_ERROR_REMOTE_USER_TERM; return conn; } static bool is_valid_psm(u16 psm, u8 dst_type) { if (!psm) return false; if (bdaddr_type_is_le(dst_type)) return (psm <= 0x00ff); /* PSM must be odd and lsb of upper byte must be 0 */ return ((psm & 0x0101) == 0x0001); } struct l2cap_chan_data { struct l2cap_chan *chan; struct pid *pid; int count; }; static void l2cap_chan_by_pid(struct l2cap_chan *chan, void *data) { struct l2cap_chan_data *d = data; struct pid *pid; if (chan == d->chan) return; if (!test_bit(FLAG_DEFER_SETUP, &chan->flags)) return; pid = chan->ops->get_peer_pid(chan); /* Only count deferred channels with the same PID/PSM */ if (d->pid != pid || chan->psm != d->chan->psm || chan->ident || chan->mode != L2CAP_MODE_EXT_FLOWCTL || chan->state != BT_CONNECT) return; d->count++; } int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst, u8 dst_type, u16 timeout) { struct l2cap_conn *conn; struct hci_conn *hcon; struct hci_dev *hdev; int err; BT_DBG("%pMR -> %pMR (type %u) psm 0x%4.4x mode 0x%2.2x", &chan->src, dst, dst_type, __le16_to_cpu(psm), chan->mode); hdev = hci_get_route(dst, &chan->src, chan->src_type); if (!hdev) return -EHOSTUNREACH; hci_dev_lock(hdev); if (!is_valid_psm(__le16_to_cpu(psm), dst_type) && !cid && chan->chan_type != L2CAP_CHAN_RAW) { err = -EINVAL; goto done; } if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED && !psm) { err = -EINVAL; goto done; } if (chan->chan_type == L2CAP_CHAN_FIXED && !cid) { err = -EINVAL; goto done; } switch (chan->mode) { case L2CAP_MODE_BASIC: break; case L2CAP_MODE_LE_FLOWCTL: break; case L2CAP_MODE_EXT_FLOWCTL: if (!enable_ecred) { err = -EOPNOTSUPP; goto done; } break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: if (!disable_ertm) break; fallthrough; default: err = -EOPNOTSUPP; goto done; } switch (chan->state) { case BT_CONNECT: case BT_CONNECT2: case BT_CONFIG: /* Already connecting */ err = 0; goto done; case BT_CONNECTED: /* Already connected */ err = -EISCONN; goto done; case BT_OPEN: case BT_BOUND: /* Can connect */ break; default: err = -EBADFD; goto done; } /* Set destination address and psm */ bacpy(&chan->dst, dst); chan->dst_type = dst_type; chan->psm = psm; chan->dcid = cid; if (bdaddr_type_is_le(dst_type)) { /* Convert from L2CAP channel address type to HCI address type */ if (dst_type == BDADDR_LE_PUBLIC) dst_type = ADDR_LE_DEV_PUBLIC; else dst_type = ADDR_LE_DEV_RANDOM; if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) hcon = hci_connect_le(hdev, dst, dst_type, false, chan->sec_level, timeout, HCI_ROLE_SLAVE, 0, 0); else hcon = hci_connect_le_scan(hdev, dst, dst_type, chan->sec_level, timeout, CONN_REASON_L2CAP_CHAN); } else { u8 auth_type = l2cap_get_auth_type(chan); hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type, CONN_REASON_L2CAP_CHAN, timeout); } if (IS_ERR(hcon)) { err = PTR_ERR(hcon); goto done; } conn = l2cap_conn_add(hcon); if (!conn) { hci_conn_drop(hcon); err = -ENOMEM; goto done; } if (chan->mode == L2CAP_MODE_EXT_FLOWCTL) { struct l2cap_chan_data data; data.chan = chan; data.pid = chan->ops->get_peer_pid(chan); data.count = 1; l2cap_chan_list(conn, l2cap_chan_by_pid, &data); /* Check if there isn't too many channels being connected */ if (data.count > L2CAP_ECRED_CONN_SCID_MAX) { hci_conn_drop(hcon); err = -EPROTO; goto done; } } mutex_lock(&conn->chan_lock); l2cap_chan_lock(chan); if (cid && __l2cap_get_chan_by_dcid(conn, cid)) { hci_conn_drop(hcon); err = -EBUSY; goto chan_unlock; } /* Update source addr of the socket */ bacpy(&chan->src, &hcon->src); chan->src_type = bdaddr_src_type(hcon); __l2cap_chan_add(conn, chan); /* l2cap_chan_add takes its own ref so we can drop this one */ hci_conn_drop(hcon); l2cap_state_change(chan, BT_CONNECT); __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); /* Release chan->sport so that it can be reused by other * sockets (as it's only used for listening sockets). */ write_lock(&chan_list_lock); chan->sport = 0; write_unlock(&chan_list_lock); if (hcon->state == BT_CONNECTED) { if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) { __clear_chan_timer(chan); if (l2cap_chan_check_security(chan, true)) l2cap_state_change(chan, BT_CONNECTED); } else l2cap_do_start(chan); } err = 0; chan_unlock: l2cap_chan_unlock(chan); mutex_unlock(&conn->chan_lock); done: hci_dev_unlock(hdev); hci_dev_put(hdev); return err; } EXPORT_SYMBOL_GPL(l2cap_chan_connect); static void l2cap_ecred_reconfigure(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; DEFINE_RAW_FLEX(struct l2cap_ecred_reconf_req, pdu, scid, 1); pdu->mtu = cpu_to_le16(chan->imtu); pdu->mps = cpu_to_le16(chan->mps); pdu->scid[0] = cpu_to_le16(chan->scid); chan->ident = l2cap_get_ident(conn); l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_RECONF_REQ, sizeof(pdu), &pdu); } int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu) { if (chan->imtu > mtu) return -EINVAL; BT_DBG("chan %p mtu 0x%4.4x", chan, mtu); chan->imtu = mtu; l2cap_ecred_reconfigure(chan); return 0; } /* ---- L2CAP interface with lower layer (HCI) ---- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) { int exact = 0, lm1 = 0, lm2 = 0; struct l2cap_chan *c; BT_DBG("hdev %s, bdaddr %pMR", hdev->name, bdaddr); /* Find listening sockets and check their link_mode */ read_lock(&chan_list_lock); list_for_each_entry(c, &chan_list, global_l) { if (c->state != BT_LISTEN) continue; if (!bacmp(&c->src, &hdev->bdaddr)) { lm1 |= HCI_LM_ACCEPT; if (test_bit(FLAG_ROLE_SWITCH, &c->flags)) lm1 |= HCI_LM_MASTER; exact++; } else if (!bacmp(&c->src, BDADDR_ANY)) { lm2 |= HCI_LM_ACCEPT; if (test_bit(FLAG_ROLE_SWITCH, &c->flags)) lm2 |= HCI_LM_MASTER; } } read_unlock(&chan_list_lock); return exact ? lm1 : lm2; } /* Find the next fixed channel in BT_LISTEN state, continue iteration * from an existing channel in the list or from the beginning of the * global list (by passing NULL as first parameter). */ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, struct hci_conn *hcon) { u8 src_type = bdaddr_src_type(hcon); read_lock(&chan_list_lock); if (c) c = list_next_entry(c, global_l); else c = list_entry(chan_list.next, typeof(*c), global_l); list_for_each_entry_from(c, &chan_list, global_l) { if (c->chan_type != L2CAP_CHAN_FIXED) continue; if (c->state != BT_LISTEN) continue; if (bacmp(&c->src, &hcon->src) && bacmp(&c->src, BDADDR_ANY)) continue; if (src_type != c->src_type) continue; c = l2cap_chan_hold_unless_zero(c); read_unlock(&chan_list_lock); return c; } read_unlock(&chan_list_lock); return NULL; } static bool l2cap_match(struct hci_conn *hcon) { return hcon->type == ACL_LINK || hcon->type == LE_LINK; } static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { struct hci_dev *hdev = hcon->hdev; struct l2cap_conn *conn; struct l2cap_chan *pchan; u8 dst_type; BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status); if (status) { l2cap_conn_del(hcon, bt_to_errno(status)); return; } conn = l2cap_conn_add(hcon); if (!conn) return; dst_type = bdaddr_dst_type(hcon); /* If device is blocked, do not create channels for it */ if (hci_bdaddr_list_lookup(&hdev->reject_list, &hcon->dst, dst_type)) return; /* Find fixed channels and notify them of the new connection. We * use multiple individual lookups, continuing each time where * we left off, because the list lock would prevent calling the * potentially sleeping l2cap_chan_lock() function. */ pchan = l2cap_global_fixed_chan(NULL, hcon); while (pchan) { struct l2cap_chan *chan, *next; /* Client fixed channels should override server ones */ if (__l2cap_get_chan_by_dcid(conn, pchan->scid)) goto next; l2cap_chan_lock(pchan); chan = pchan->ops->new_connection(pchan); if (chan) { bacpy(&chan->src, &hcon->src); bacpy(&chan->dst, &hcon->dst); chan->src_type = bdaddr_src_type(hcon); chan->dst_type = dst_type; __l2cap_chan_add(conn, chan); } l2cap_chan_unlock(pchan); next: next = l2cap_global_fixed_chan(pchan, hcon); l2cap_chan_put(pchan); pchan = next; } l2cap_conn_ready(conn); } int l2cap_disconn_ind(struct hci_conn *hcon) { struct l2cap_conn *conn = hcon->l2cap_data; BT_DBG("hcon %p", hcon); if (!conn) return HCI_ERROR_REMOTE_USER_TERM; return conn->disc_reason; } static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); l2cap_conn_del(hcon, bt_to_errno(reason)); } static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt) { if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) return; if (encrypt == 0x00) { if (chan->sec_level == BT_SECURITY_MEDIUM) { __set_chan_timer(chan, L2CAP_ENC_TIMEOUT); } else if (chan->sec_level == BT_SECURITY_HIGH || chan->sec_level == BT_SECURITY_FIPS) l2cap_chan_close(chan, ECONNREFUSED); } else { if (chan->sec_level == BT_SECURITY_MEDIUM) __clear_chan_timer(chan); } } static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) { struct l2cap_conn *conn = hcon->l2cap_data; struct l2cap_chan *chan; if (!conn) return; BT_DBG("conn %p status 0x%2.2x encrypt %u", conn, status, encrypt); mutex_lock(&conn->chan_lock); list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); BT_DBG("chan %p scid 0x%4.4x state %s", chan, chan->scid, state_to_string(chan->state)); if (!status && encrypt) chan->sec_level = hcon->sec_level; if (!__l2cap_no_conn_pending(chan)) { l2cap_chan_unlock(chan); continue; } if (!status && (chan->state == BT_CONNECTED || chan->state == BT_CONFIG)) { chan->ops->resume(chan); l2cap_check_encryption(chan, encrypt); l2cap_chan_unlock(chan); continue; } if (chan->state == BT_CONNECT) { if (!status && l2cap_check_enc_key_size(hcon)) l2cap_start_connection(chan); else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); } else if (chan->state == BT_CONNECT2 && !(chan->mode == L2CAP_MODE_EXT_FLOWCTL || chan->mode == L2CAP_MODE_LE_FLOWCTL)) { struct l2cap_conn_rsp rsp; __u16 res, stat; if (!status && l2cap_check_enc_key_size(hcon)) { if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { res = L2CAP_CR_PEND; stat = L2CAP_CS_AUTHOR_PEND; chan->ops->defer(chan); } else { l2cap_state_change(chan, BT_CONFIG); res = L2CAP_CR_SUCCESS; stat = L2CAP_CS_NO_INFO; } } else { l2cap_state_change(chan, BT_DISCONN); __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); res = L2CAP_CR_SEC_BLOCK; stat = L2CAP_CS_NO_INFO; } rsp.scid = cpu_to_le16(chan->dcid); rsp.dcid = cpu_to_le16(chan->scid); rsp.result = cpu_to_le16(res); rsp.status = cpu_to_le16(stat); l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); if (!test_bit(CONF_REQ_SENT, &chan->conf_state) && res == L2CAP_CR_SUCCESS) { char buf[128]; set_bit(CONF_REQ_SENT, &chan->conf_state); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } } l2cap_chan_unlock(chan); } mutex_unlock(&conn->chan_lock); } /* Append fragment into frame respecting the maximum len of rx_skb */ static int l2cap_recv_frag(struct l2cap_conn *conn, struct sk_buff *skb, u16 len) { if (!conn->rx_skb) { /* Allocate skb for the complete frame (with header) */ conn->rx_skb = bt_skb_alloc(len, GFP_KERNEL); if (!conn->rx_skb) return -ENOMEM; /* Init rx_len */ conn->rx_len = len; } /* Copy as much as the rx_skb can hold */ len = min_t(u16, len, skb->len); skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, len), len); skb_pull(skb, len); conn->rx_len -= len; return len; } static int l2cap_recv_len(struct l2cap_conn *conn, struct sk_buff *skb) { struct sk_buff *rx_skb; int len; /* Append just enough to complete the header */ len = l2cap_recv_frag(conn, skb, L2CAP_LEN_SIZE - conn->rx_skb->len); /* If header could not be read just continue */ if (len < 0 || conn->rx_skb->len < L2CAP_LEN_SIZE) return len; rx_skb = conn->rx_skb; len = get_unaligned_le16(rx_skb->data); /* Check if rx_skb has enough space to received all fragments */ if (len + (L2CAP_HDR_SIZE - L2CAP_LEN_SIZE) <= skb_tailroom(rx_skb)) { /* Update expected len */ conn->rx_len = len + (L2CAP_HDR_SIZE - L2CAP_LEN_SIZE); return L2CAP_LEN_SIZE; } /* Reset conn->rx_skb since it will need to be reallocated in order to * fit all fragments. */ conn->rx_skb = NULL; /* Reallocates rx_skb using the exact expected length */ len = l2cap_recv_frag(conn, rx_skb, len + (L2CAP_HDR_SIZE - L2CAP_LEN_SIZE)); kfree_skb(rx_skb); return len; } static void l2cap_recv_reset(struct l2cap_conn *conn) { kfree_skb(conn->rx_skb); conn->rx_skb = NULL; conn->rx_len = 0; } void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) { struct l2cap_conn *conn = hcon->l2cap_data; int len; if (!conn) conn = l2cap_conn_add(hcon); if (!conn) goto drop; BT_DBG("conn %p len %u flags 0x%x", conn, skb->len, flags); switch (flags) { case ACL_START: case ACL_START_NO_FLUSH: case ACL_COMPLETE: if (conn->rx_skb) { BT_ERR("Unexpected start frame (len %d)", skb->len); l2cap_recv_reset(conn); l2cap_conn_unreliable(conn, ECOMM); } /* Start fragment may not contain the L2CAP length so just * copy the initial byte when that happens and use conn->mtu as * expected length. */ if (skb->len < L2CAP_LEN_SIZE) { l2cap_recv_frag(conn, skb, conn->mtu); break; } len = get_unaligned_le16(skb->data) + L2CAP_HDR_SIZE; if (len == skb->len) { /* Complete frame received */ l2cap_recv_frame(conn, skb); return; } BT_DBG("Start: total len %d, frag len %u", len, skb->len); if (skb->len > len) { BT_ERR("Frame is too long (len %u, expected len %d)", skb->len, len); l2cap_conn_unreliable(conn, ECOMM); goto drop; } /* Append fragment into frame (with header) */ if (l2cap_recv_frag(conn, skb, len) < 0) goto drop; break; case ACL_CONT: BT_DBG("Cont: frag len %u (expecting %u)", skb->len, conn->rx_len); if (!conn->rx_skb) { BT_ERR("Unexpected continuation frame (len %d)", skb->len); l2cap_conn_unreliable(conn, ECOMM); goto drop; } /* Complete the L2CAP length if it has not been read */ if (conn->rx_skb->len < L2CAP_LEN_SIZE) { if (l2cap_recv_len(conn, skb) < 0) { l2cap_conn_unreliable(conn, ECOMM); goto drop; } /* Header still could not be read just continue */ if (conn->rx_skb->len < L2CAP_LEN_SIZE) break; } if (skb->len > conn->rx_len) { BT_ERR("Fragment is too long (len %u, expected %u)", skb->len, conn->rx_len); l2cap_recv_reset(conn); l2cap_conn_unreliable(conn, ECOMM); goto drop; } /* Append fragment into frame (with header) */ l2cap_recv_frag(conn, skb, skb->len); if (!conn->rx_len) { /* Complete frame received. l2cap_recv_frame * takes ownership of the skb so set the global * rx_skb pointer to NULL first. */ struct sk_buff *rx_skb = conn->rx_skb; conn->rx_skb = NULL; l2cap_recv_frame(conn, rx_skb); } break; } drop: kfree_skb(skb); } static struct hci_cb l2cap_cb = { .name = "L2CAP", .match = l2cap_match, .connect_cfm = l2cap_connect_cfm, .disconn_cfm = l2cap_disconn_cfm, .security_cfm = l2cap_security_cfm, }; static int l2cap_debugfs_show(struct seq_file *f, void *p) { struct l2cap_chan *c; read_lock(&chan_list_lock); list_for_each_entry(c, &chan_list, global_l) { seq_printf(f, "%pMR (%u) %pMR (%u) %d %d 0x%4.4x 0x%4.4x %d %d %d %d\n", &c->src, c->src_type, &c->dst, c->dst_type, c->state, __le16_to_cpu(c->psm), c->scid, c->dcid, c->imtu, c->omtu, c->sec_level, c->mode); } read_unlock(&chan_list_lock); return 0; } DEFINE_SHOW_ATTRIBUTE(l2cap_debugfs); static struct dentry *l2cap_debugfs; int __init l2cap_init(void) { int err; err = l2cap_init_sockets(); if (err < 0) return err; hci_register_cb(&l2cap_cb); if (IS_ERR_OR_NULL(bt_debugfs)) return 0; l2cap_debugfs = debugfs_create_file("l2cap", 0444, bt_debugfs, NULL, &l2cap_debugfs_fops); return 0; } void l2cap_exit(void) { debugfs_remove(l2cap_debugfs); hci_unregister_cb(&l2cap_cb); l2cap_cleanup_sockets(); } module_param(disable_ertm, bool, 0644); MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode"); module_param(enable_ecred, bool, 0644); MODULE_PARM_DESC(enable_ecred, "Enable enhanced credit flow control mode");
35 18 27 6 11 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 #ifndef LLC_PDU_H #define LLC_PDU_H /* * Copyright (c) 1997 by Procom Technology,Inc. * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <linux/if_ether.h> /* Lengths of frame formats */ #define LLC_PDU_LEN_I 4 /* header and 2 control bytes */ #define LLC_PDU_LEN_S 4 #define LLC_PDU_LEN_U 3 /* header and 1 control byte */ /* header and 1 control byte and XID info */ #define LLC_PDU_LEN_U_XID (LLC_PDU_LEN_U + sizeof(struct llc_xid_info)) /* Known SAP addresses */ #define LLC_GLOBAL_SAP 0xFF #define LLC_NULL_SAP 0x00 /* not network-layer visible */ #define LLC_MGMT_INDIV 0x02 /* station LLC mgmt indiv addr */ #define LLC_MGMT_GRP 0x03 /* station LLC mgmt group addr */ #define LLC_RDE_SAP 0xA6 /* route ... */ /* SAP field bit masks */ #define LLC_ISO_RESERVED_SAP 0x02 #define LLC_SAP_GROUP_DSAP 0x01 #define LLC_SAP_RESP_SSAP 0x01 /* Group/individual DSAP indicator is DSAP field */ #define LLC_PDU_GROUP_DSAP_MASK 0x01 #define LLC_PDU_IS_GROUP_DSAP(pdu) \ ((pdu->dsap & LLC_PDU_GROUP_DSAP_MASK) ? 0 : 1) #define LLC_PDU_IS_INDIV_DSAP(pdu) \ (!(pdu->dsap & LLC_PDU_GROUP_DSAP_MASK) ? 0 : 1) /* Command/response PDU indicator in SSAP field */ #define LLC_PDU_CMD_RSP_MASK 0x01 #define LLC_PDU_CMD 0 #define LLC_PDU_RSP 1 #define LLC_PDU_IS_CMD(pdu) ((pdu->ssap & LLC_PDU_RSP) ? 0 : 1) #define LLC_PDU_IS_RSP(pdu) ((pdu->ssap & LLC_PDU_RSP) ? 1 : 0) /* Get PDU type from 2 lowest-order bits of control field first byte */ #define LLC_PDU_TYPE_I_MASK 0x01 /* 16-bit control field */ #define LLC_PDU_TYPE_S_MASK 0x03 #define LLC_PDU_TYPE_U_MASK 0x03 /* 8-bit control field */ #define LLC_PDU_TYPE_MASK 0x03 #define LLC_PDU_TYPE_I 0 /* first bit */ #define LLC_PDU_TYPE_S 1 /* first two bits */ #define LLC_PDU_TYPE_U 3 /* first two bits */ #define LLC_PDU_TYPE_U_XID 4 /* private type for detecting XID commands */ #define LLC_PDU_TYPE_IS_I(pdu) \ ((!(pdu->ctrl_1 & LLC_PDU_TYPE_I_MASK)) ? 1 : 0) #define LLC_PDU_TYPE_IS_U(pdu) \ (((pdu->ctrl_1 & LLC_PDU_TYPE_U_MASK) == LLC_PDU_TYPE_U) ? 1 : 0) #define LLC_PDU_TYPE_IS_S(pdu) \ (((pdu->ctrl_1 & LLC_PDU_TYPE_S_MASK) == LLC_PDU_TYPE_S) ? 1 : 0) /* U-format PDU control field masks */ #define LLC_U_PF_BIT_MASK 0x10 /* P/F bit mask */ #define LLC_U_PF_IS_1(pdu) ((pdu->ctrl_1 & LLC_U_PF_BIT_MASK) ? 1 : 0) #define LLC_U_PF_IS_0(pdu) ((!(pdu->ctrl_1 & LLC_U_PF_BIT_MASK)) ? 1 : 0) #define LLC_U_PDU_CMD_MASK 0xEC /* cmd/rsp mask */ #define LLC_U_PDU_CMD(pdu) (pdu->ctrl_1 & LLC_U_PDU_CMD_MASK) #define LLC_U_PDU_RSP(pdu) (pdu->ctrl_1 & LLC_U_PDU_CMD_MASK) #define LLC_1_PDU_CMD_UI 0x00 /* Type 1 cmds/rsps */ #define LLC_1_PDU_CMD_XID 0xAC #define LLC_1_PDU_CMD_TEST 0xE0 #define LLC_2_PDU_CMD_SABME 0x6C /* Type 2 cmds/rsps */ #define LLC_2_PDU_CMD_DISC 0x40 #define LLC_2_PDU_RSP_UA 0x60 #define LLC_2_PDU_RSP_DM 0x0C #define LLC_2_PDU_RSP_FRMR 0x84 /* Type 1 operations */ /* XID information field bit masks */ /* LLC format identifier (byte 1) */ #define LLC_XID_FMT_ID 0x81 /* first byte must be this */ /* LLC types/classes identifier (byte 2) */ #define LLC_XID_CLASS_ZEROS_MASK 0xE0 /* these must be zeros */ #define LLC_XID_CLASS_MASK 0x1F /* AND with byte to get below */ #define LLC_XID_NULL_CLASS_1 0x01 /* if NULL LSAP...use these */ #define LLC_XID_NULL_CLASS_2 0x03 #define LLC_XID_NULL_CLASS_3 0x05 #define LLC_XID_NULL_CLASS_4 0x07 #define LLC_XID_NNULL_TYPE_1 0x01 /* if non-NULL LSAP...use these */ #define LLC_XID_NNULL_TYPE_2 0x02 #define LLC_XID_NNULL_TYPE_3 0x04 #define LLC_XID_NNULL_TYPE_1_2 0x03 #define LLC_XID_NNULL_TYPE_1_3 0x05 #define LLC_XID_NNULL_TYPE_2_3 0x06 #define LLC_XID_NNULL_ALL 0x07 /* Sender Receive Window (byte 3) */ #define LLC_XID_RW_MASK 0xFE /* AND with value to get below */ #define LLC_XID_MIN_RW 0x02 /* lowest-order bit always zero */ /* Type 2 operations */ #define LLC_2_SEQ_NBR_MODULO ((u8) 128) /* I-PDU masks ('ctrl' is I-PDU control word) */ #define LLC_I_GET_NS(pdu) (u8)((pdu->ctrl_1 & 0xFE) >> 1) #define LLC_I_GET_NR(pdu) (u8)((pdu->ctrl_2 & 0xFE) >> 1) #define LLC_I_PF_BIT_MASK 0x01 #define LLC_I_PF_IS_0(pdu) ((!(pdu->ctrl_2 & LLC_I_PF_BIT_MASK)) ? 1 : 0) #define LLC_I_PF_IS_1(pdu) ((pdu->ctrl_2 & LLC_I_PF_BIT_MASK) ? 1 : 0) /* S-PDU supervisory commands and responses */ #define LLC_S_PDU_CMD_MASK 0x0C #define LLC_S_PDU_CMD(pdu) (pdu->ctrl_1 & LLC_S_PDU_CMD_MASK) #define LLC_S_PDU_RSP(pdu) (pdu->ctrl_1 & LLC_S_PDU_CMD_MASK) #define LLC_2_PDU_CMD_RR 0x00 /* rx ready cmd */ #define LLC_2_PDU_RSP_RR 0x00 /* rx ready rsp */ #define LLC_2_PDU_CMD_REJ 0x08 /* reject PDU cmd */ #define LLC_2_PDU_RSP_REJ 0x08 /* reject PDU rsp */ #define LLC_2_PDU_CMD_RNR 0x04 /* rx not ready cmd */ #define LLC_2_PDU_RSP_RNR 0x04 /* rx not ready rsp */ #define LLC_S_PF_BIT_MASK 0x01 #define LLC_S_PF_IS_0(pdu) ((!(pdu->ctrl_2 & LLC_S_PF_BIT_MASK)) ? 1 : 0) #define LLC_S_PF_IS_1(pdu) ((pdu->ctrl_2 & LLC_S_PF_BIT_MASK) ? 1 : 0) #define PDU_SUPV_GET_Nr(pdu) ((pdu->ctrl_2 & 0xFE) >> 1) #define PDU_GET_NEXT_Vr(sn) (((sn) + 1) & ~LLC_2_SEQ_NBR_MODULO) /* FRMR information field macros */ #define FRMR_INFO_LENGTH 5 /* 5 bytes of information */ /* * info is pointer to FRMR info field structure; 'rej_ctrl' is byte pointer * (if U-PDU) or word pointer to rejected PDU control field */ #define FRMR_INFO_SET_REJ_CNTRL(info,rej_ctrl) \ info->rej_pdu_ctrl = ((*((u8 *) rej_ctrl) & \ LLC_PDU_TYPE_U) != LLC_PDU_TYPE_U ? \ (u16)*((u16 *) rej_ctrl) : \ (((u16) *((u8 *) rej_ctrl)) & 0x00FF)) /* * Info is pointer to FRMR info field structure; 'vs' is a byte containing * send state variable value in low-order 7 bits (insure the lowest-order * bit remains zero (0)) */ #define FRMR_INFO_SET_Vs(info,vs) (info->curr_ssv = (((u8) vs) << 1)) #define FRMR_INFO_SET_Vr(info,vr) (info->curr_rsv = (((u8) vr) << 1)) /* * Info is pointer to FRMR info field structure; 'cr' is a byte containing * the C/R bit value in the low-order bit */ #define FRMR_INFO_SET_C_R_BIT(info, cr) (info->curr_rsv |= (((u8) cr) & 0x01)) /* * In the remaining five macros, 'info' is pointer to FRMR info field * structure; 'ind' is a byte containing the bit value to set in the * lowest-order bit) */ #define FRMR_INFO_SET_INVALID_PDU_CTRL_IND(info, ind) \ (info->ind_bits = ((info->ind_bits & 0xFE) | (((u8) ind) & 0x01))) #define FRMR_INFO_SET_INVALID_PDU_INFO_IND(info, ind) \ (info->ind_bits = ( (info->ind_bits & 0xFD) | (((u8) ind) & 0x02))) #define FRMR_INFO_SET_PDU_INFO_2LONG_IND(info, ind) \ (info->ind_bits = ( (info->ind_bits & 0xFB) | (((u8) ind) & 0x04))) #define FRMR_INFO_SET_PDU_INVALID_Nr_IND(info, ind) \ (info->ind_bits = ( (info->ind_bits & 0xF7) | (((u8) ind) & 0x08))) #define FRMR_INFO_SET_PDU_INVALID_Ns_IND(info, ind) \ (info->ind_bits = ( (info->ind_bits & 0xEF) | (((u8) ind) & 0x10))) /* Sequence-numbered PDU format (4 bytes in length) */ struct llc_pdu_sn { u8 dsap; u8 ssap; u8 ctrl_1; u8 ctrl_2; } __packed; static inline struct llc_pdu_sn *llc_pdu_sn_hdr(struct sk_buff *skb) { return (struct llc_pdu_sn *)skb_network_header(skb); } /* Un-numbered PDU format (3 bytes in length) */ struct llc_pdu_un { u8 dsap; u8 ssap; u8 ctrl_1; } __packed; static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb) { return (struct llc_pdu_un *)skb_network_header(skb); } /** * llc_pdu_header_init - initializes pdu header * @skb: input skb that header must be set into it. * @type: type of PDU (U, I or S). * @ssap: source sap. * @dsap: destination sap. * @cr: command/response bit (0 or 1). * * This function sets DSAP, SSAP and command/Response bit in LLC header. */ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, u8 ssap, u8 dsap, u8 cr) { int hlen = 4; /* default value for I and S types */ struct llc_pdu_un *pdu; switch (type) { case LLC_PDU_TYPE_U: hlen = 3; break; case LLC_PDU_TYPE_U_XID: hlen = 6; break; } skb_push(skb, hlen); skb_reset_network_header(skb); pdu = llc_pdu_un_hdr(skb); pdu->dsap = dsap; pdu->ssap = ssap; pdu->ssap |= cr; } /** * llc_pdu_decode_sa - extracts, source address (MAC) of input frame * @skb: input skb that source address must be extracted from it. * @sa: pointer to source address (6 byte array). * * This function extracts source address(MAC) of input frame. */ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) { memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); } /** * llc_pdu_decode_da - extracts dest address of input frame * @skb: input skb that destination address must be extracted from it * @da: pointer to destination address (6 byte array). * * This function extracts destination address(MAC) of input frame. */ static inline void llc_pdu_decode_da(struct sk_buff *skb, u8 *da) { memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); } /** * llc_pdu_decode_ssap - extracts source SAP of input frame * @skb: input skb that source SAP must be extracted from it. * @ssap: source SAP (output argument). * * This function extracts source SAP of input frame. Right bit of SSAP is * command/response bit. */ static inline void llc_pdu_decode_ssap(struct sk_buff *skb, u8 *ssap) { *ssap = llc_pdu_un_hdr(skb)->ssap & 0xFE; } /** * llc_pdu_decode_dsap - extracts dest SAP of input frame * @skb: input skb that destination SAP must be extracted from it. * @dsap: destination SAP (output argument). * * This function extracts destination SAP of input frame. right bit of * DSAP designates individual/group SAP. */ static inline void llc_pdu_decode_dsap(struct sk_buff *skb, u8 *dsap) { *dsap = llc_pdu_un_hdr(skb)->dsap & 0xFE; } /** * llc_pdu_init_as_ui_cmd - sets LLC header as UI PDU * @skb: input skb that header must be set into it. * * This function sets third byte of LLC header as a UI PDU. */ static inline void llc_pdu_init_as_ui_cmd(struct sk_buff *skb) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_1_PDU_CMD_UI; } /** * llc_pdu_init_as_test_cmd - sets PDU as TEST * @skb: Address of the skb to build * * Sets a PDU as TEST */ static inline void llc_pdu_init_as_test_cmd(struct sk_buff *skb) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_1_PDU_CMD_TEST; pdu->ctrl_1 |= LLC_U_PF_BIT_MASK; } /** * llc_pdu_init_as_test_rsp - build TEST response PDU * @skb: Address of the skb to build * @ev_skb: The received TEST command PDU frame * * Builds a pdu frame as a TEST response. */ static inline void llc_pdu_init_as_test_rsp(struct sk_buff *skb, struct sk_buff *ev_skb) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_1_PDU_CMD_TEST; pdu->ctrl_1 |= LLC_U_PF_BIT_MASK; if (ev_skb->protocol == htons(ETH_P_802_2)) { struct llc_pdu_un *ev_pdu = llc_pdu_un_hdr(ev_skb); int dsize; dsize = ntohs(eth_hdr(ev_skb)->h_proto) - 3; memcpy(((u8 *)pdu) + 3, ((u8 *)ev_pdu) + 3, dsize); skb_put(skb, dsize); } } /* LLC Type 1 XID command/response information fields format */ struct llc_xid_info { u8 fmt_id; /* always 0x81 for LLC */ u8 type; /* different if NULL/non-NULL LSAP */ u8 rw; /* sender receive window */ } __packed; /** * llc_pdu_init_as_xid_cmd - sets bytes 3, 4 & 5 of LLC header as XID * @skb: input skb that header must be set into it. * @svcs_supported: The class of the LLC (I or II) * @rx_window: The size of the receive window of the LLC * * This function sets third,fourth,fifth and sixth bytes of LLC header as * a XID PDU. */ static inline void llc_pdu_init_as_xid_cmd(struct sk_buff *skb, u8 svcs_supported, u8 rx_window) { struct llc_xid_info *xid_info; struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_1_PDU_CMD_XID; pdu->ctrl_1 |= LLC_U_PF_BIT_MASK; xid_info = (struct llc_xid_info *)(((u8 *)&pdu->ctrl_1) + 1); xid_info->fmt_id = LLC_XID_FMT_ID; /* 0x81 */ xid_info->type = svcs_supported; xid_info->rw = rx_window << 1; /* size of receive window */ /* no need to push/put since llc_pdu_header_init() has already * pushed 3 + 3 bytes */ } /** * llc_pdu_init_as_xid_rsp - builds XID response PDU * @skb: Address of the skb to build * @svcs_supported: The class of the LLC (I or II) * @rx_window: The size of the receive window of the LLC * * Builds a pdu frame as an XID response. */ static inline void llc_pdu_init_as_xid_rsp(struct sk_buff *skb, u8 svcs_supported, u8 rx_window) { struct llc_xid_info *xid_info; struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_1_PDU_CMD_XID; pdu->ctrl_1 |= LLC_U_PF_BIT_MASK; xid_info = (struct llc_xid_info *)(((u8 *)&pdu->ctrl_1) + 1); xid_info->fmt_id = LLC_XID_FMT_ID; xid_info->type = svcs_supported; xid_info->rw = rx_window << 1; skb_put(skb, sizeof(struct llc_xid_info)); } /* LLC Type 2 FRMR response information field format */ struct llc_frmr_info { u16 rej_pdu_ctrl; /* bits 1-8 if U-PDU */ u8 curr_ssv; /* current send state variable val */ u8 curr_rsv; /* current receive state variable */ u8 ind_bits; /* indicator bits set with macro */ } __packed; void llc_pdu_set_cmd_rsp(struct sk_buff *skb, u8 type); void llc_pdu_set_pf_bit(struct sk_buff *skb, u8 bit_value); void llc_pdu_decode_pf_bit(struct sk_buff *skb, u8 *pf_bit); void llc_pdu_init_as_disc_cmd(struct sk_buff *skb, u8 p_bit); void llc_pdu_init_as_i_cmd(struct sk_buff *skb, u8 p_bit, u8 ns, u8 nr); void llc_pdu_init_as_rej_cmd(struct sk_buff *skb, u8 p_bit, u8 nr); void llc_pdu_init_as_rnr_cmd(struct sk_buff *skb, u8 p_bit, u8 nr); void llc_pdu_init_as_rr_cmd(struct sk_buff *skb, u8 p_bit, u8 nr); void llc_pdu_init_as_sabme_cmd(struct sk_buff *skb, u8 p_bit); void llc_pdu_init_as_dm_rsp(struct sk_buff *skb, u8 f_bit); void llc_pdu_init_as_frmr_rsp(struct sk_buff *skb, struct llc_pdu_sn *prev_pdu, u8 f_bit, u8 vs, u8 vr, u8 vzyxw); void llc_pdu_init_as_rr_rsp(struct sk_buff *skb, u8 f_bit, u8 nr); void llc_pdu_init_as_rej_rsp(struct sk_buff *skb, u8 f_bit, u8 nr); void llc_pdu_init_as_rnr_rsp(struct sk_buff *skb, u8 f_bit, u8 nr); void llc_pdu_init_as_ua_rsp(struct sk_buff *skb, u8 f_bit); #endif /* LLC_PDU_H */
33 99 72 33 2 2 64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 /* SPDX-License-Identifier: GPL-2.0-only */ /* * pcm_local.h - a local header file for snd-pcm module. * * Copyright (c) Takashi Sakamoto <o-takashi@sakamocchi.jp> */ #ifndef __SOUND_CORE_PCM_LOCAL_H #define __SOUND_CORE_PCM_LOCAL_H extern const struct snd_pcm_hw_constraint_list snd_pcm_known_rates; void snd_interval_mul(const struct snd_interval *a, const struct snd_interval *b, struct snd_interval *c); void snd_interval_div(const struct snd_interval *a, const struct snd_interval *b, struct snd_interval *c); void snd_interval_muldivk(const struct snd_interval *a, const struct snd_interval *b, unsigned int k, struct snd_interval *c); void snd_interval_mulkdiv(const struct snd_interval *a, unsigned int k, const struct snd_interval *b, struct snd_interval *c); int snd_pcm_hw_constraint_mask(struct snd_pcm_runtime *runtime, snd_pcm_hw_param_t var, u_int32_t mask); int pcm_lib_apply_appl_ptr(struct snd_pcm_substream *substream, snd_pcm_uframes_t appl_ptr); int snd_pcm_update_state(struct snd_pcm_substream *substream, struct snd_pcm_runtime *runtime); int snd_pcm_update_hw_ptr(struct snd_pcm_substream *substream); void snd_pcm_playback_silence(struct snd_pcm_substream *substream, snd_pcm_uframes_t new_hw_ptr); static inline snd_pcm_uframes_t snd_pcm_avail(struct snd_pcm_substream *substream) { if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) return snd_pcm_playback_avail(substream->runtime); else return snd_pcm_capture_avail(substream->runtime); } static inline snd_pcm_uframes_t snd_pcm_hw_avail(struct snd_pcm_substream *substream) { if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) return snd_pcm_playback_hw_avail(substream->runtime); else return snd_pcm_capture_hw_avail(substream->runtime); } #ifdef CONFIG_SND_PCM_TIMER void snd_pcm_timer_resolution_change(struct snd_pcm_substream *substream); void snd_pcm_timer_init(struct snd_pcm_substream *substream); void snd_pcm_timer_done(struct snd_pcm_substream *substream); #else static inline void snd_pcm_timer_resolution_change(struct snd_pcm_substream *substream) {} static inline void snd_pcm_timer_init(struct snd_pcm_substream *substream) {} static inline void snd_pcm_timer_done(struct snd_pcm_substream *substream) {} #endif void __snd_pcm_xrun(struct snd_pcm_substream *substream); void snd_pcm_group_init(struct snd_pcm_group *group); void snd_pcm_sync_stop(struct snd_pcm_substream *substream, bool sync_irq); #define PCM_RUNTIME_CHECK(sub) snd_BUG_ON(!(sub) || !(sub)->runtime) /* loop over all PCM substreams */ #define for_each_pcm_substream(pcm, str, subs) \ for ((str) = 0; (str) < 2; (str)++) \ for ((subs) = (pcm)->streams[str].substream; (subs); \ (subs) = (subs)->next) static inline void snd_pcm_dma_buffer_sync(struct snd_pcm_substream *substream, enum snd_dma_sync_mode mode) { if (substream->runtime->info & SNDRV_PCM_INFO_EXPLICIT_SYNC) snd_dma_buffer_sync(snd_pcm_get_dma_buf(substream), mode); } #endif /* __SOUND_CORE_PCM_LOCAL_H */
102 101 102 102 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_VIRTIO_RING_H #define _LINUX_VIRTIO_RING_H #include <asm/barrier.h> #include <linux/irqreturn.h> #include <uapi/linux/virtio_ring.h> /* * Barriers in virtio are tricky. Non-SMP virtio guests can't assume * they're not on an SMP host system, so they need to assume real * barriers. Non-SMP virtio hosts could skip the barriers, but does * anyone care? * * For virtio_pci on SMP, we don't need to order with respect to MMIO * accesses through relaxed memory I/O windows, so virt_mb() et al are * sufficient. * * For using virtio to talk to real devices (eg. other heterogeneous * CPUs) we do need real barriers. In theory, we could be using both * kinds of virtio, so it's a runtime decision, and the branch is * actually quite cheap. */ static inline void virtio_mb(bool weak_barriers) { if (weak_barriers) virt_mb(); else mb(); } static inline void virtio_rmb(bool weak_barriers) { if (weak_barriers) virt_rmb(); else dma_rmb(); } static inline void virtio_wmb(bool weak_barriers) { if (weak_barriers) virt_wmb(); else dma_wmb(); } #define virtio_store_mb(weak_barriers, p, v) \ do { \ if (weak_barriers) { \ virt_store_mb(*p, v); \ } else { \ WRITE_ONCE(*p, v); \ mb(); \ } \ } while (0) \ struct virtio_device; struct virtqueue; struct device; /* * Creates a virtqueue and allocates the descriptor ring. If * may_reduce_num is set, then this may allocate a smaller ring than * expected. The caller should query virtqueue_get_vring_size to learn * the actual size of the ring. */ struct virtqueue *vring_create_virtqueue(unsigned int index, unsigned int num, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, bool may_reduce_num, bool ctx, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), const char *name); /* * Creates a virtqueue and allocates the descriptor ring with per * virtqueue DMA device. */ struct virtqueue *vring_create_virtqueue_dma(unsigned int index, unsigned int num, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, bool may_reduce_num, bool ctx, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), const char *name, struct device *dma_dev); /* * Creates a virtqueue with a standard layout but a caller-allocated * ring. */ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int num, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, bool ctx, void *pages, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), const char *name); /* * Destroys a virtqueue. If created with vring_create_virtqueue, this * also frees the ring. */ void vring_del_virtqueue(struct virtqueue *vq); /* Filter out transport-specific feature bits. */ void vring_transport_features(struct virtio_device *vdev); irqreturn_t vring_interrupt(int irq, void *_vq); u32 vring_notification_data(struct virtqueue *_vq); #endif /* _LINUX_VIRTIO_RING_H */
22 22 22 22 72 71 68 67 88 88 88 82 84 88 63 87 15 35 22 22 22 22 22 22 22 22 22 22 22 53 4 12 7 437 438 435 437 437 440 440 439 440 438 440 439 13 13 434 440 440 162 376 377 440 39 39 26 30 30 30 30 1 29 1 28 27 1 27 1 27 26 25 26 25 24 24 23 2 22 1 21 4 1 79 12 12 11 3 77 14 13 12 3 74 74 3 72 74 3 3 74 3 3 74 2 2 74 2 73 2 74 4 73 73 73 73 4 1 3 1 70 1 70 1 69 4 4 3 2 65 66 2 66 6 2 65 6 3 62 80 12 20 19 7 5 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 53 38 1 53 1 52 1 51 19 20 38 29 29 28 26 29 27 28 28 27 29 29 3 3 3 29 2 29 3 29 1 28 27 29 23 38 30 1 38 2 36 1 35 30 30 16 15 13 15 1 15 1 1 1 15 1 1 1 1 15 15 15 15 21 22 35 43 43 43 43 43 43 43 3 2 43 2 2 2 42 2 2 1 43 37 43 1 43 2 43 3 42 2 43 1 43 1 43 3 43 2 43 43 2 43 43 4 4 4 4 4 4 4 4 4 1 5 4 3 2 1 10 6 5 3 2 2 2 2 2 2 2 2 2 10 39 39 38 39 695 692 35 694 43 1 262 264 264 1167 1168 677 676 17 17 17 247 247 1167 22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 // SPDX-License-Identifier: GPL-2.0-only /* * net/core/fib_rules.c Generic Routing Rules * * Authors: Thomas Graf <tgraf@suug.ch> */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/module.h> #include <net/net_namespace.h> #include <net/inet_dscp.h> #include <net/sock.h> #include <net/fib_rules.h> #include <net/ip_tunnels.h> #include <linux/indirect_call_wrapper.h> #if defined(CONFIG_IPV6) && defined(CONFIG_IPV6_MULTIPLE_TABLES) #ifdef CONFIG_IP_MULTIPLE_TABLES #define INDIRECT_CALL_MT(f, f2, f1, ...) \ INDIRECT_CALL_INET(f, f2, f1, __VA_ARGS__) #else #define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__) #endif #elif defined(CONFIG_IP_MULTIPLE_TABLES) #define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) #else #define INDIRECT_CALL_MT(f, f2, f1, ...) f(__VA_ARGS__) #endif static const struct fib_kuid_range fib_kuid_range_unset = { KUIDT_INIT(0), KUIDT_INIT(~0), }; bool fib_rule_matchall(const struct fib_rule *rule) { if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id || rule->flags) return false; if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1) return false; if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) || !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end)) return false; if (fib_rule_port_range_set(&rule->sport_range)) return false; if (fib_rule_port_range_set(&rule->dport_range)) return false; return true; } EXPORT_SYMBOL_GPL(fib_rule_matchall); int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table) { struct fib_rule *r; r = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT); if (r == NULL) return -ENOMEM; refcount_set(&r->refcnt, 1); r->action = FR_ACT_TO_TBL; r->pref = pref; r->table = table; r->proto = RTPROT_KERNEL; r->fr_net = ops->fro_net; r->uid_range = fib_kuid_range_unset; r->suppress_prefixlen = -1; r->suppress_ifgroup = -1; /* The lock is not required here, the list in unreachable * at the moment this function is called */ list_add_tail(&r->list, &ops->rules_list); return 0; } EXPORT_SYMBOL(fib_default_rule_add); static u32 fib_default_rule_pref(struct fib_rules_ops *ops) { struct list_head *pos; struct fib_rule *rule; if (!list_empty(&ops->rules_list)) { pos = ops->rules_list.next; if (pos->next != &ops->rules_list) { rule = list_entry(pos->next, struct fib_rule, list); if (rule->pref) return rule->pref - 1; } } return 0; } static void notify_rule_change(int event, struct fib_rule *rule, struct fib_rules_ops *ops, struct nlmsghdr *nlh, u32 pid); static struct fib_rules_ops *lookup_rules_ops(const struct net *net, int family) { struct fib_rules_ops *ops; rcu_read_lock(); list_for_each_entry_rcu(ops, &net->rules_ops, list) { if (ops->family == family) { if (!try_module_get(ops->owner)) ops = NULL; rcu_read_unlock(); return ops; } } rcu_read_unlock(); return NULL; } static void rules_ops_put(struct fib_rules_ops *ops) { if (ops) module_put(ops->owner); } static void flush_route_cache(struct fib_rules_ops *ops) { if (ops->flush_cache) ops->flush_cache(ops); } static int __fib_rules_register(struct fib_rules_ops *ops) { int err = -EEXIST; struct fib_rules_ops *o; struct net *net; net = ops->fro_net; if (ops->rule_size < sizeof(struct fib_rule)) return -EINVAL; if (ops->match == NULL || ops->configure == NULL || ops->compare == NULL || ops->fill == NULL || ops->action == NULL) return -EINVAL; spin_lock(&net->rules_mod_lock); list_for_each_entry(o, &net->rules_ops, list) if (ops->family == o->family) goto errout; list_add_tail_rcu(&ops->list, &net->rules_ops); err = 0; errout: spin_unlock(&net->rules_mod_lock); return err; } struct fib_rules_ops * fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net) { struct fib_rules_ops *ops; int err; ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL); if (ops == NULL) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&ops->rules_list); ops->fro_net = net; err = __fib_rules_register(ops); if (err) { kfree(ops); ops = ERR_PTR(err); } return ops; } EXPORT_SYMBOL_GPL(fib_rules_register); static void fib_rules_cleanup_ops(struct fib_rules_ops *ops) { struct fib_rule *rule, *tmp; list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) { list_del_rcu(&rule->list); if (ops->delete) ops->delete(rule); fib_rule_put(rule); } } void fib_rules_unregister(struct fib_rules_ops *ops) { struct net *net = ops->fro_net; spin_lock(&net->rules_mod_lock); list_del_rcu(&ops->list); spin_unlock(&net->rules_mod_lock); fib_rules_cleanup_ops(ops); kfree_rcu(ops, rcu); } EXPORT_SYMBOL_GPL(fib_rules_unregister); static int uid_range_set(struct fib_kuid_range *range) { return uid_valid(range->start) && uid_valid(range->end); } static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb) { struct fib_rule_uid_range *in; struct fib_kuid_range out; in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]); out.start = make_kuid(current_user_ns(), in->start); out.end = make_kuid(current_user_ns(), in->end); return out; } static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range) { struct fib_rule_uid_range out = { from_kuid_munged(current_user_ns(), range->start), from_kuid_munged(current_user_ns(), range->end) }; return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out); } static int nla_get_port_range(struct nlattr *pattr, struct fib_rule_port_range *port_range) { const struct fib_rule_port_range *pr = nla_data(pattr); if (!fib_rule_port_range_valid(pr)) return -EINVAL; port_range->start = pr->start; port_range->end = pr->end; return 0; } static int nla_put_port_range(struct sk_buff *skb, int attrtype, struct fib_rule_port_range *range) { return nla_put(skb, attrtype, sizeof(*range), range); } static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) { int ret = 0; if (rule->iifindex && (rule->iifindex != fl->flowi_iif)) goto out; if (rule->oifindex && (rule->oifindex != fl->flowi_oif)) goto out; if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) goto out; if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id)) goto out; if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg)) goto out; if (uid_lt(fl->flowi_uid, rule->uid_range.start) || uid_gt(fl->flowi_uid, rule->uid_range.end)) goto out; ret = INDIRECT_CALL_MT(ops->match, fib6_rule_match, fib4_rule_match, rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; } int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) { struct fib_rule *rule; int err; rcu_read_lock(); list_for_each_entry_rcu(rule, &ops->rules_list, list) { jumped: if (!fib_rule_match(rule, ops, fl, flags, arg)) continue; if (rule->action == FR_ACT_GOTO) { struct fib_rule *target; target = rcu_dereference(rule->ctarget); if (target == NULL) { continue; } else { rule = target; goto jumped; } } else if (rule->action == FR_ACT_NOP) continue; else err = INDIRECT_CALL_MT(ops->action, fib6_rule_action, fib4_rule_action, rule, fl, flags, arg); if (!err && ops->suppress && INDIRECT_CALL_MT(ops->suppress, fib6_rule_suppress, fib4_rule_suppress, rule, flags, arg)) continue; if (err != -EAGAIN) { if ((arg->flags & FIB_LOOKUP_NOREF) || likely(refcount_inc_not_zero(&rule->refcnt))) { arg->rule = rule; goto out; } break; } } err = -ESRCH; out: rcu_read_unlock(); return err; } EXPORT_SYMBOL_GPL(fib_rules_lookup); static int call_fib_rule_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_rule *rule, int family, struct netlink_ext_ack *extack) { struct fib_rule_notifier_info info = { .info.family = family, .info.extack = extack, .rule = rule, }; return call_fib_notifier(nb, event_type, &info.info); } static int call_fib_rule_notifiers(struct net *net, enum fib_event_type event_type, struct fib_rule *rule, struct fib_rules_ops *ops, struct netlink_ext_ack *extack) { struct fib_rule_notifier_info info = { .info.family = ops->family, .info.extack = extack, .rule = rule, }; ASSERT_RTNL(); /* Paired with READ_ONCE() in fib_rules_seq() */ WRITE_ONCE(ops->fib_rules_seq, ops->fib_rules_seq + 1); return call_fib_notifiers(net, event_type, &info.info); } /* Called with rcu_read_lock() */ int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, struct netlink_ext_ack *extack) { struct fib_rules_ops *ops; struct fib_rule *rule; int err = 0; ops = lookup_rules_ops(net, family); if (!ops) return -EAFNOSUPPORT; list_for_each_entry_rcu(rule, &ops->rules_list, list) { err = call_fib_rule_notifier(nb, FIB_EVENT_RULE_ADD, rule, family, extack); if (err) break; } rules_ops_put(ops); return err; } EXPORT_SYMBOL_GPL(fib_rules_dump); unsigned int fib_rules_seq_read(const struct net *net, int family) { unsigned int fib_rules_seq; struct fib_rules_ops *ops; ops = lookup_rules_ops(net, family); if (!ops) return 0; /* Paired with WRITE_ONCE() in call_fib_rule_notifiers() */ fib_rules_seq = READ_ONCE(ops->fib_rules_seq); rules_ops_put(ops); return fib_rules_seq; } EXPORT_SYMBOL_GPL(fib_rules_seq_read); static struct fib_rule *rule_find(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, struct nlattr **tb, struct fib_rule *rule, bool user_priority) { struct fib_rule *r; list_for_each_entry(r, &ops->rules_list, list) { if (rule->action && r->action != rule->action) continue; if (rule->table && r->table != rule->table) continue; if (user_priority && r->pref != rule->pref) continue; if (rule->iifname[0] && memcmp(r->iifname, rule->iifname, IFNAMSIZ)) continue; if (rule->oifname[0] && memcmp(r->oifname, rule->oifname, IFNAMSIZ)) continue; if (rule->mark && r->mark != rule->mark) continue; if (rule->suppress_ifgroup != -1 && r->suppress_ifgroup != rule->suppress_ifgroup) continue; if (rule->suppress_prefixlen != -1 && r->suppress_prefixlen != rule->suppress_prefixlen) continue; if (rule->mark_mask && r->mark_mask != rule->mark_mask) continue; if (rule->tun_id && r->tun_id != rule->tun_id) continue; if (r->fr_net != rule->fr_net) continue; if (rule->l3mdev && r->l3mdev != rule->l3mdev) continue; if (uid_range_set(&rule->uid_range) && (!uid_eq(r->uid_range.start, rule->uid_range.start) || !uid_eq(r->uid_range.end, rule->uid_range.end))) continue; if (rule->ip_proto && r->ip_proto != rule->ip_proto) continue; if (rule->proto && r->proto != rule->proto) continue; if (fib_rule_port_range_set(&rule->sport_range) && !fib_rule_port_range_compare(&r->sport_range, &rule->sport_range)) continue; if (fib_rule_port_range_set(&rule->dport_range) && !fib_rule_port_range_compare(&r->dport_range, &rule->dport_range)) continue; if (!ops->compare(r, frh, tb)) continue; return r; } return NULL; } #ifdef CONFIG_NET_L3_MASTER_DEV static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule, struct netlink_ext_ack *extack) { nlrule->l3mdev = nla_get_u8(nla); if (nlrule->l3mdev != 1) { NL_SET_ERR_MSG(extack, "Invalid l3mdev attribute"); return -1; } return 0; } #else static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule, struct netlink_ext_ack *extack) { NL_SET_ERR_MSG(extack, "l3mdev support is not enabled in kernel"); return -1; } #endif static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, struct fib_rules_ops *ops, struct nlattr *tb[], struct fib_rule **rule, bool *user_priority) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rule *nlrule = NULL; int err = -EINVAL; if (frh->src_len) if (!tb[FRA_SRC] || frh->src_len > (ops->addr_size * 8) || nla_len(tb[FRA_SRC]) != ops->addr_size) { NL_SET_ERR_MSG(extack, "Invalid source address"); goto errout; } if (frh->dst_len) if (!tb[FRA_DST] || frh->dst_len > (ops->addr_size * 8) || nla_len(tb[FRA_DST]) != ops->addr_size) { NL_SET_ERR_MSG(extack, "Invalid dst address"); goto errout; } nlrule = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT); if (!nlrule) { err = -ENOMEM; goto errout; } refcount_set(&nlrule->refcnt, 1); nlrule->fr_net = net; if (tb[FRA_PRIORITY]) { nlrule->pref = nla_get_u32(tb[FRA_PRIORITY]); *user_priority = true; } else { nlrule->pref = fib_default_rule_pref(ops); } nlrule->proto = nla_get_u8_default(tb[FRA_PROTOCOL], RTPROT_UNSPEC); if (tb[FRA_IIFNAME]) { struct net_device *dev; nlrule->iifindex = -1; nla_strscpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); dev = __dev_get_by_name(net, nlrule->iifname); if (dev) nlrule->iifindex = dev->ifindex; } if (tb[FRA_OIFNAME]) { struct net_device *dev; nlrule->oifindex = -1; nla_strscpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); dev = __dev_get_by_name(net, nlrule->oifname); if (dev) nlrule->oifindex = dev->ifindex; } if (tb[FRA_FWMARK]) { nlrule->mark = nla_get_u32(tb[FRA_FWMARK]); if (nlrule->mark) /* compatibility: if the mark value is non-zero all bits * are compared unless a mask is explicitly specified. */ nlrule->mark_mask = 0xFFFFFFFF; } if (tb[FRA_FWMASK]) nlrule->mark_mask = nla_get_u32(tb[FRA_FWMASK]); if (tb[FRA_TUN_ID]) nlrule->tun_id = nla_get_be64(tb[FRA_TUN_ID]); if (tb[FRA_L3MDEV] && fib_nl2rule_l3mdev(tb[FRA_L3MDEV], nlrule, extack) < 0) goto errout_free; nlrule->action = frh->action; nlrule->flags = frh->flags; nlrule->table = frh_get_table(frh, tb); if (tb[FRA_SUPPRESS_PREFIXLEN]) nlrule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]); else nlrule->suppress_prefixlen = -1; if (tb[FRA_SUPPRESS_IFGROUP]) nlrule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]); else nlrule->suppress_ifgroup = -1; if (tb[FRA_GOTO]) { if (nlrule->action != FR_ACT_GOTO) { NL_SET_ERR_MSG(extack, "Unexpected goto"); goto errout_free; } nlrule->target = nla_get_u32(tb[FRA_GOTO]); /* Backward jumps are prohibited to avoid endless loops */ if (nlrule->target <= nlrule->pref) { NL_SET_ERR_MSG(extack, "Backward goto not supported"); goto errout_free; } } else if (nlrule->action == FR_ACT_GOTO) { NL_SET_ERR_MSG(extack, "Missing goto target for action goto"); goto errout_free; } if (nlrule->l3mdev && nlrule->table) { NL_SET_ERR_MSG(extack, "l3mdev and table are mutually exclusive"); goto errout_free; } if (tb[FRA_UID_RANGE]) { if (current_user_ns() != net->user_ns) { err = -EPERM; NL_SET_ERR_MSG(extack, "No permission to set uid"); goto errout_free; } nlrule->uid_range = nla_get_kuid_range(tb); if (!uid_range_set(&nlrule->uid_range) || !uid_lte(nlrule->uid_range.start, nlrule->uid_range.end)) { NL_SET_ERR_MSG(extack, "Invalid uid range"); goto errout_free; } } else { nlrule->uid_r